How would you implement a vector search with similarity scoring?
#gen-ai#vector-db#embeddings
Answer
Implementing Vector Search with Similarity Scoring
Vector search finds the most semantically similar documents to a query by computing distances between embedding vectors.
Core Similarity Metrics
Cosine similarity — angle between vectors (range: -1 to 1). Best for text:
Dot product — magnitude × cosine similarity. Faster, requires normalised vectors.
Euclidean distance (L2) — straight-line distance in vector space. Less common for text.
Implementation from Scratch
pythonimport numpy as np from typing import NamedTuple class SearchResult(NamedTuple): id: str text: str score: float def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) def vector_search( query_embedding: np.ndarray, document_embeddings: np.ndarray, documents: list[dict], top_k: int = 5, min_score: float = 0.7, ) -> list[SearchResult]: '''Search documents by cosine similarity to query embedding.''' # Compute all similarities at once (vectorised — much faster than loop) query_norm = query_embedding / np.linalg.norm(query_embedding) doc_norms = document_embeddings / np.linalg.norm(document_embeddings, axis=1, keepdims=True) scores = doc_norms @ query_norm # (N,) array of cosine similarities # Get top-k indices above threshold top_indices = np.argsort(scores)[::-1][:top_k] results = [] for idx in top_indices: if scores[idx] >= min_score: results.append(SearchResult( id=documents[idx]["id"], text=documents[idx]["text"], score=float(scores[idx]), )) return results
Production Implementation with FAISS
pythonimport faiss import numpy as np from sentence_transformers import SentenceTransformer class VectorStore: def __init__(self, embedding_dim: int = 384): self.model = SentenceTransformer("all-MiniLM-L6-v2") self.embedding_dim = embedding_dim # IndexFlatIP = Inner Product (cosine similarity on normalised vectors) self.index = faiss.IndexFlatIP(embedding_dim) # IndexIVFFlat = Approximate search (faster for 100K+ documents) # nlist = number of Voronoi cells # quantizer = faiss.IndexFlatIP(embedding_dim) # self.index = faiss.IndexIVFFlat(quantizer, embedding_dim, nlist=100) self.documents: list[dict] = [] def add_documents(self, documents: list[dict]) -> None: texts = [doc["text"] for doc in documents] embeddings = self.model.encode(texts, normalize_embeddings=True) embeddings = embeddings.astype(np.float32) self.index.add(embeddings) self.documents.extend(documents) print(f"Index now contains {self.index.ntotal} vectors") def search(self, query: str, top_k: int = 5) -> list[SearchResult]: query_embedding = self.model.encode( [query], normalize_embeddings=True ).astype(np.float32) scores, indices = self.index.search(query_embedding, top_k) results = [] for score, idx in zip(scores[0], indices[0]): if idx != -1: # -1 means no result found results.append(SearchResult( id=self.documents[idx].get("id", str(idx)), text=self.documents[idx]["text"], score=float(score), )) return results # Usage store = VectorStore() store.add_documents([ {"id": "1", "text": "RAG retrieves documents to ground LLM answers"}, {"id": "2", "text": "Fine-tuning updates model weights on domain data"}, {"id": "3", "text": "The weather in London is often rainy"}, ]) results = store.search("How does retrieval-augmented generation work?", top_k=2) for r in results: print(f"Score: {r.score:.4f} | {r.text}") # Score: 0.8912 | RAG retrieves documents to ground LLM answers # Score: 0.4231 | Fine-tuning updates model weights on domain data
Hybrid Search (Keyword + Semantic)
pythonfrom rank_bm25 import BM25Okapi class HybridSearcher: def __init__(self, documents: list[dict], alpha: float = 0.5): '''alpha=0: pure BM25, alpha=1: pure semantic''' self.alpha = alpha texts = [doc["text"] for doc in documents] # BM25 index tokenized = [t.lower().split() for t in texts] self.bm25 = BM25Okapi(tokenized) # Semantic index self.vector_store = VectorStore() self.vector_store.add_documents(documents) def search(self, query: str, top_k: int = 5) -> list[dict]: # BM25 scores (keyword relevance) bm25_scores = self.bm25.get_scores(query.lower().split()) bm25_norm = bm25_scores / (bm25_scores.max() + 1e-8) # Semantic scores semantic_results = self.vector_store.search(query, top_k=len(bm25_scores)) semantic_scores = np.zeros(len(bm25_scores)) for r in semantic_results: semantic_scores[int(r.id)] = r.score # Weighted combination (Reciprocal Rank Fusion or linear blend) combined = (1 - self.alpha) * bm25_norm + self.alpha * semantic_scores top_indices = np.argsort(combined)[::-1][:top_k] return [{"idx": int(i), "score": float(combined[i])} for i in top_indices]
Score interpretation: Cosine similarity of > 0.8 = very similar, 0.5–0.8 = related, < 0.5 = probably not relevant. Set your
threshold based on your precision/recall trade-off.textmin_score