Vector Databases

Below is a table of the popular vector search solutions:

Tool Free Tier Pricing Model Docs
FAISS N/A, self-host Open-source Faiss.ai
Pinecone 2GB free From $25/mo pinecone.io
Qdrant 1GB free cloud Pay-as-you-go qdrant.tech
Weaviate 14-day sandbox From $25/mo weaviate.io
Milvus 5GB free cloud PAYG or $99/mo dedicated milvus.io
Chroma N/A, self-host Free (Apache 2.0) trychroma.com
Redis 30MB free From $5/mo redis.io

Example Python Code

Below are basic usage snippets for each tool.

FAISS

import faiss
import numpy as np

# Dimensionality of embeddings
d = 128

# Create a flat L2 index
index = faiss.IndexFlatL2(d)

# Random vectors
data = np.random.random((1000, d)).astype('float32')
index.add(data)

# Query
query = np.random.random((1, d)).astype('float32')
D, I = index.search(query, k=5)

print("Distances:", D)
print("Neighbors:", I)

Pinecone

import pinecone

pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")

index_name = "my-index"

# Create the index if it doesn't exist
if index_name not in pinecone.list_indexes():
    pinecone.create_index(name=index_name, dimension=128)

# Connect
index = pinecone.Index(index_name)

# Upsert
vectors = [
    ("id1", [0.1]*128),
    ("id2", [0.2]*128)
]
index.upsert(vectors)

# Query
response = index.query([[0.15]*128], top_k=3)
print(response)

Qdrant

import qdrant_client
from qdrant_client.models import Distance, VectorParams, PointStruct

client = qdrant_client.QdrantClient(
    url="https://YOUR-QDRANT-CLOUD-ENDPOINT",
    api_key="YOUR_API_KEY"
)

collection = "my_collection"
client.recreate_collection(
    collection_name=collection,
    vectors_config=VectorParams(size=128, distance=Distance.COSINE)
)

points = [
    PointStruct(id=1, vector=[0.1]*128, payload={"type": "doc1"}),
    PointStruct(id=2, vector=[0.2]*128, payload={"type": "doc2"}),
]

client.upsert(collection_name=collection, points=points)

results = client.search(
    collection_name=collection,
    query_vector=[0.15]*128,
    limit=2
)
print(results)

Weaviate

import weaviate

client = weaviate.Client("https://YOUR-WEAVIATE-CLOUD-ENDPOINT")

schema = {
    "classes": [
        {
            "class": "Article",
            "vectorizer": "none"
        }
    ]
}
client.schema.create(schema)

obj = {
    "title": "Hello World",
    "content": "Weaviate vector search"
}
client.data_object.create(obj, "Article", vector=[0.1]*128)

resp = (
    client.query
    .get("Article", ["title", "content"])
    .with_near_vector({"vector": [0.15]*128})
    .with_limit(3)
    .do()
)
print(resp)

Milvus

from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
import numpy as np

connections.connect(alias="default", host="localhost", port="19530")

fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128)
]
schema = CollectionSchema(fields)
collection = Collection("MyCollection", schema)

emb = np.random.rand(10, 128).astype('float32')
ids = list(range(10))
collection.insert([ids, emb])

index_params = {
    "index_type": "IVF_FLAT",
    "params": {"nlist": 128},
    "metric_type": "L2"
}
collection.create_index("embedding", index_params)
collection.load()

query_emb = np.random.rand(1, 128).astype('float32')
results = collection.search(query_emb, "embedding", param={"nprobe": 10}, limit=3)
print(results)

Chroma

import chromadb
from chromadb.config import Settings

client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory="./chroma_data"
))

coll = client.create_collection("my_collection")

vectors = [[0.1, 0.2, 0.3], [0.2, 0.2, 0.2]]
metas = [{"doc": "text1"}, {"doc": "text2"}]
ids = ["id1", "id2"]
coll.add(embeddings=vectors, metadatas=metas, ids=ids)

res = coll.query(query_embeddings=[[0.15, 0.25, 0.3]], n_results=2)
print(res)

Redis

import redis
import struct

r = redis.Redis(host="localhost", port=6379)

# Create index
r.execute_command(
    "FT.CREATE", "my_idx", "ON", "HASH",
    "SCHEMA", "embedding", "VECTOR", "FLAT", "6",
    "TYPE", "FLOAT32", "DIM", "128",
    "DISTANCE_METRIC", "L2"
)

# Insert
vec = struct.pack('128f', *[0.1]*128)
r.hset("doc1", mapping={"embedding": vec})

# Search
qvec = struct.pack('128f', *[0.15]*128)
q = "*=>[KNN 3 @embedding $BLOB AS dist]"
res = r.ft("my_idx").search(q, query_params={"BLOB": qvec})
print(res.docs)