OpenAI GPT-4 Integration: Best Practices and Patterns
Learn how to effectively integrate OpenAI GPT-4 into your applications with function calling, embeddings, fine-tuning considerations, and production patterns.
Dibyank Padhy
Engineering Manager & Full Stack Developer
OpenAI GPT-4 Integration: Best Practices and Patterns
OpenAI's GPT-4 offers powerful capabilities for building intelligent applications. This guide covers production-ready patterns for integrating GPT-4 into your systems, from basic chat to advanced function calling and embeddings.
Setting Up the OpenAI Client
Start with a properly configured client that handles retries and rate limiting:
import OpenAI from 'openai';
import { RateLimiter } from './utils/rate-limiter';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
maxRetries: 3,
timeout: 30000,
});
// Token-based rate limiter
const rateLimiter = new RateLimiter({
tokensPerMinute: 90000, // GPT-4 Turbo limit
requestsPerMinute: 500,
});
interface ChatOptions {
model?: string;
temperature?: number;
maxTokens?: number;
systemPrompt?: string;
}
async function chat(
messages: OpenAI.Chat.ChatCompletionMessageParam[],
options: ChatOptions = {}
): Promise<string> {
const {
model = 'gpt-4-turbo-preview',
temperature = 0.7,
maxTokens = 4096,
systemPrompt,
} = options;
// Estimate tokens and wait for rate limiter
const estimatedTokens = estimateTokens(messages);
await rateLimiter.acquire(estimatedTokens);
const allMessages: OpenAI.Chat.ChatCompletionMessageParam[] = systemPrompt
? [{ role: 'system', content: systemPrompt }, ...messages]
: messages;
const response = await openai.chat.completions.create({
model,
messages: allMessages,
temperature,
max_tokens: maxTokens,
});
return response.choices[0]?.message?.content || '';
}
// Simple token estimation (for more accuracy, use tiktoken)
function estimateTokens(messages: OpenAI.Chat.ChatCompletionMessageParam[]): number {
return messages.reduce((total, msg) => {
const content = typeof msg.content === 'string' ? msg.content : '';
return total + Math.ceil(content.length / 4);
}, 0);
}Function Calling
GPT-4's function calling is powerful for structured interactions:
// Define functions
const functions: OpenAI.Chat.ChatCompletionCreateParams.Function[] = [
{
name: 'get_stock_price',
description: 'Get the current stock price for a given ticker symbol',
parameters: {
type: 'object',
properties: {
ticker: {
type: 'string',
description: 'Stock ticker symbol, e.g., AAPL, GOOGL',
},
include_history: {
type: 'boolean',
description: 'Include price history for the last 30 days',
},
},
required: ['ticker'],
},
},
{
name: 'execute_trade',
description: 'Execute a stock trade. Use only after user confirmation.',
parameters: {
type: 'object',
properties: {
ticker: { type: 'string' },
action: { type: 'string', enum: ['buy', 'sell'] },
quantity: { type: 'number' },
order_type: {
type: 'string',
enum: ['market', 'limit'],
description: 'Order type - market or limit',
},
limit_price: {
type: 'number',
description: 'Required if order_type is limit',
},
},
required: ['ticker', 'action', 'quantity', 'order_type'],
},
},
{
name: 'analyze_portfolio',
description: 'Analyze the user\'s portfolio performance',
parameters: {
type: 'object',
properties: {
period: {
type: 'string',
enum: ['1d', '1w', '1m', '3m', '1y', 'all'],
},
include_recommendations: { type: 'boolean' },
},
required: ['period'],
},
},
];
// Function implementations
const functionHandlers: Record<string, (args: any) => Promise<any>> = {
get_stock_price: async ({ ticker, include_history }) => {
const price = await stockAPI.getPrice(ticker);
const result: any = { ticker, price, currency: 'USD', timestamp: new Date().toISOString() };
if (include_history) {
result.history = await stockAPI.getPriceHistory(ticker, 30);
}
return result;
},
execute_trade: async ({ ticker, action, quantity, order_type, limit_price }) => {
const order = await tradingAPI.createOrder({
ticker,
side: action,
quantity,
type: order_type,
limitPrice: limit_price,
});
return {
orderId: order.id,
status: order.status,
estimatedTotal: order.estimatedTotal,
};
},
analyze_portfolio: async ({ period, include_recommendations }) => {
const analysis = await portfolioAPI.analyze(period);
if (include_recommendations) {
analysis.recommendations = await portfolioAPI.getRecommendations();
}
return analysis;
},
};
// Run conversation with function calling
async function runConversation(userMessage: string) {
const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{
role: 'system',
content: `You are a helpful stock trading assistant. Help users check prices, analyze their portfolio, and execute trades. Always confirm before executing trades.`,
},
{ role: 'user', content: userMessage },
];
while (true) {
const response = await openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages,
functions,
function_call: 'auto',
});
const message = response.choices[0].message;
// If no function call, return the response
if (!message.function_call) {
return message.content;
}
// Execute the function
const functionName = message.function_call.name;
const functionArgs = JSON.parse(message.function_call.arguments);
console.log(`Calling function: ${functionName}`, functionArgs);
const handler = functionHandlers[functionName];
if (!handler) {
throw new Error(`Unknown function: ${functionName}`);
}
const functionResult = await handler(functionArgs);
// Add assistant message and function result
messages.push(message);
messages.push({
role: 'function',
name: functionName,
content: JSON.stringify(functionResult),
});
}
}Streaming with Function Calls
Handle streaming responses that may include function calls:
async function* streamWithFunctions(
messages: OpenAI.Chat.ChatCompletionMessageParam[]
): AsyncGenerator<{ type: 'text' | 'function_call'; content: any }> {
const stream = await openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages,
functions,
stream: true,
});
let functionName = '';
let functionArgs = '';
let currentText = '';
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
if (delta?.content) {
currentText += delta.content;
yield { type: 'text', content: delta.content };
}
if (delta?.function_call) {
if (delta.function_call.name) {
functionName = delta.function_call.name;
}
if (delta.function_call.arguments) {
functionArgs += delta.function_call.arguments;
}
}
}
// If we collected a function call, yield it
if (functionName) {
yield {
type: 'function_call',
content: {
name: functionName,
arguments: JSON.parse(functionArgs),
},
};
}
}
// Usage with UI updates
async function handleUserMessage(userMessage: string) {
const messages = [{ role: 'user' as const, content: userMessage }];
let fullResponse = '';
for await (const event of streamWithFunctions(messages)) {
if (event.type === 'text') {
fullResponse += event.content;
updateUI(fullResponse);
} else if (event.type === 'function_call') {
showFunctionCallIndicator(event.content.name);
const result = await functionHandlers[event.content.name](event.content.arguments);
// Continue conversation with function result
messages.push({ role: 'assistant', content: null, function_call: event.content });
messages.push({ role: 'function', name: event.content.name, content: JSON.stringify(result) });
// Recursively continue streaming
for await (const continueEvent of streamWithFunctions(messages)) {
if (continueEvent.type === 'text') {
fullResponse += continueEvent.content;
updateUI(fullResponse);
}
}
}
}
}Embeddings and Semantic Search
Build powerful search systems with OpenAI embeddings:
import { createClient } from '@supabase/supabase-js';
const supabase = createClient(
process.env.SUPABASE_URL!,
process.env.SUPABASE_KEY!
);
interface DocumentChunk {
id: string;
content: string;
metadata: Record<string, any>;
embedding: number[];
}
// Generate embeddings in batches
async function generateEmbeddings(texts: string[]): Promise<number[][]> {
const batchSize = 100; // OpenAI limit
const embeddings: number[][] = [];
for (let i = 0; i < texts.length; i += batchSize) {
const batch = texts.slice(i, i + batchSize);
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: batch,
dimensions: 1536, // Can reduce for cost savings
});
embeddings.push(...response.data.map(d => d.embedding));
}
return embeddings;
}
// Store documents with embeddings
async function indexDocuments(documents: { content: string; metadata: any }[]) {
const contents = documents.map(d => d.content);
const embeddings = await generateEmbeddings(contents);
const records = documents.map((doc, i) => ({
content: doc.content,
metadata: doc.metadata,
embedding: embeddings[i],
}));
const { error } = await supabase
.from('documents')
.insert(records);
if (error) throw error;
}
// Semantic search using pgvector
async function semanticSearch(
query: string,
options: { limit?: number; filter?: Record<string, any> } = {}
): Promise<DocumentChunk[]> {
const { limit = 5, filter } = options;
// Generate query embedding
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: query,
});
const queryEmbedding = response.data[0].embedding;
// Search with cosine similarity
let queryBuilder = supabase
.rpc('match_documents', {
query_embedding: queryEmbedding,
match_threshold: 0.7,
match_count: limit,
});
// Apply filters if provided
if (filter) {
for (const [key, value] of Object.entries(filter)) {
queryBuilder = queryBuilder.eq(`metadata->>${key}`, value);
}
}
const { data, error } = await queryBuilder;
if (error) throw error;
return data;
}
// Hybrid search combining keyword and semantic
async function hybridSearch(
query: string,
options: { limit?: number; keywordWeight?: number } = {}
): Promise<DocumentChunk[]> {
const { limit = 5, keywordWeight = 0.3 } = options;
// Get semantic results
const semanticResults = await semanticSearch(query, { limit: limit * 2 });
// Get keyword results
const { data: keywordResults } = await supabase
.from('documents')
.select()
.textSearch('content', query.split(' ').join(' | '))
.limit(limit * 2);
// Combine and score
const scoreMap = new Map<string, { doc: DocumentChunk; score: number }>();
semanticResults.forEach((doc, index) => {
const semanticScore = 1 - (index / semanticResults.length);
scoreMap.set(doc.id, { doc, score: semanticScore * (1 - keywordWeight) });
});
keywordResults?.forEach((doc, index) => {
const keywordScore = 1 - (index / keywordResults.length);
const existing = scoreMap.get(doc.id);
if (existing) {
existing.score += keywordScore * keywordWeight;
} else {
scoreMap.set(doc.id, { doc, score: keywordScore * keywordWeight });
}
});
// Sort by combined score
return Array.from(scoreMap.values())
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map(item => item.doc);
}Vision Capabilities
GPT-4 Vision can analyze images alongside text:
async function analyzeImage(
imageUrl: string,
prompt: string
): Promise<string> {
const response = await openai.chat.completions.create({
model: 'gpt-4-vision-preview',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: prompt },
{
type: 'image_url',
image_url: {
url: imageUrl,
detail: 'high', // or 'low' for faster/cheaper processing
},
},
],
},
],
max_tokens: 1000,
});
return response.choices[0]?.message?.content || '';
}
// Analyze multiple images
async function compareImages(
images: string[],
prompt: string
): Promise<string> {
const content: OpenAI.Chat.ChatCompletionContentPart[] = [
{ type: 'text', text: prompt },
...images.map(url => ({
type: 'image_url' as const,
image_url: { url, detail: 'low' as const },
})),
];
const response = await openai.chat.completions.create({
model: 'gpt-4-vision-preview',
messages: [{ role: 'user', content }],
max_tokens: 2000,
});
return response.choices[0]?.message?.content || '';
}
// Extract structured data from image
async function extractReceiptData(imageBase64: string) {
const response = await openai.chat.completions.create({
model: 'gpt-4-vision-preview',
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: `Extract the following from this receipt image and return as JSON:
- store_name
- date
- items (array of {name, quantity, price})
- subtotal
- tax
- total
Return only valid JSON.`,
},
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${imageBase64}`,
detail: 'high',
},
},
],
},
],
max_tokens: 1000,
});
const content = response.choices[0]?.message?.content || '{}';
return JSON.parse(content);
}Error Handling and Monitoring
Implement robust error handling for production:
import { APIError, RateLimitError } from 'openai';
interface OpenAIMetrics {
requestCount: number;
tokenCount: number;
errorCount: number;
latencyMs: number;
}
const metrics: OpenAIMetrics = {
requestCount: 0,
tokenCount: 0,
errorCount: 0,
latencyMs: 0,
};
async function trackedRequest<T>(
operation: string,
fn: () => Promise<T>
): Promise<T> {
const startTime = Date.now();
metrics.requestCount++;
try {
const result = await fn();
metrics.latencyMs = Date.now() - startTime;
return result;
} catch (error) {
metrics.errorCount++;
if (error instanceof RateLimitError) {
console.error(`Rate limit hit for ${operation}`, {
retryAfter: error.headers?.['retry-after'],
});
// Implement backoff
const retryAfter = parseInt(error.headers?.['retry-after'] || '60');
await new Promise(r => setTimeout(r, retryAfter * 1000));
return fn(); // Retry once
}
if (error instanceof APIError) {
console.error(`API error for ${operation}`, {
status: error.status,
message: error.message,
code: error.code,
});
// Handle specific error codes
switch (error.status) {
case 400:
throw new Error('Invalid request - check your parameters');
case 401:
throw new Error('Authentication failed - check API key');
case 403:
throw new Error('Access denied - check permissions');
case 404:
throw new Error('Resource not found');
case 429:
throw new Error('Rate limit exceeded');
case 500:
case 503:
// Server error - retry with backoff
await new Promise(r => setTimeout(r, 5000));
return fn();
default:
throw error;
}
}
throw error;
}
}
// Usage
const response = await trackedRequest('chat', () =>
openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages: [{ role: 'user', content: 'Hello!' }],
})
);Conclusion
OpenAI's GPT-4 provides powerful capabilities for building intelligent applications. By implementing proper function calling, embeddings, streaming, and error handling, you can create robust production systems.
Key takeaways:
- Use function calling for structured interactions
- Implement proper rate limiting and retry logic
- Leverage embeddings for semantic search
- Handle streaming for better UX
- Monitor usage and errors in production