A list of strings, where each string is a question or prompt for the AI to validate. Example: ["What is the capital of France?", "Write a short story about bitcoin", "Result of this math question: 2 + 2 = ?"]
responses
List[string]
A list of strings corresponding to the AI-generated answers to the prompts. Example: ["Paris", "A story about bitcoin ...", "2 + 2 = 4"]
ground_truths
List[string]
A list of strings that represent the correct or expected answers to the prompts. Example: ["The capital of France is Paris.", "A short story about bitcoin describes...", "2 + 2 = 4"]
Usage
cURL
curl -X POST 'https://api.flexstack.ai/validator/v1/llm' \
-H 'Content-Type: application/json' \
-d '{
"prompts": ["What is the capital of France?", "Write a short story about bitcoin", "Result of this math question: 2 + 2 = ?"],
"responses": ["Paris", "A story about bitcoin ...", "2 + 2 = 4"],
"ground_truths": ["The capital of France is Paris.", "A short story about bitcoin describes...", "2 + 2 = 4"]
}'
Python (requests)
import requests
from typing import List, Any
class FlexStackAIValidator():
def __init__(self, base_url: str = 'https://api.flexstack.ai/validator/v1') -> None:
# Define the base URL
self.base_url = base_url
def _llm_validator(self, questions: List[str], answers: List[str], ground_truths: List[str] = []):
'''
LLM Validator
Args:
questions (List[str]): List of questions
answers (List[str]): List of answers
ground_truths (List[str], optional): List of ground truth responses. Defaults to empty.
Returns:
Response: Response object
'''
# Data
data = {
'questions': questions,
'answers': answers,
'ground_truths': ground_truths
}
# Get response
response = requests.post(f"{self.base_url}/llm", json=data)
return response
def __call__(
self,
prompts: List[str],
responses: List[str],
ground_truths: List[str] = [],
ai_type: str = "llm",
) -> List[Any]:
'''
Validate FlexStack AI responses
Args:
prompts (List[str]): List of prompts
responses (List[str]): List of responses by host
ground_truths (List[str], optional): List of ground truth responses. Defaults to empty.
Returns:
ValidationResult: A list of validation results
'''
if ai_type == "llm":
response = self._llm_validator(prompts, responses, ground_truths)
if response.status_code == 200:
return response.json()['content']
else:
print(f"Error: {response.json()}")
else:
raise ValueError("AI type not supported")
Examples
test_case = {
'prompts': ['What is the capital of France?', 'Write a short story about bitcoin', 'Result of this math question: 2 + 2 = ?'],
'responses': ['Paris', 'A story about bitcoin ...', '2 + 2 = 4'],
'ground_truths': ['The capital of France is Paris.', 'A', '2 + 2 = 4'],
}
# Define the validator
ai_validator = FlexStackAIValidator()
response = ai_validator(**test_case)
print(response)