Documentation
¶
Overview ¶
Package search provides code search and retrieval capabilities.
Index ¶
- Constants
- func EuclideanDistance(a, b Embedding) float64
- type Chunker
- type CodeDocument
- type CodeIndex
- type CodeSymbol
- type Document
- type DocumentChunk
- type Embedding
- type EmbeddingProvider
- type FixedSizeChunker
- type IndexConfig
- type IndexOption
- type SearchOptions
- type SearchResponse
- type SearchResult
- type SemanticIndex
- func (idx *SemanticIndex) Add(ctx context.Context, doc *Document) error
- func (idx *SemanticIndex) AddBatch(ctx context.Context, docs []*Document) error
- func (idx *SemanticIndex) Clear()
- func (idx *SemanticIndex) Count() int
- func (idx *SemanticIndex) Get(id string) *Document
- func (idx *SemanticIndex) HybridSearch(ctx context.Context, query string, topK int, keywordWeight float64) ([]*SearchResult, error)
- func (idx *SemanticIndex) KeywordSearch(query string, topK int) []*SearchResult
- func (idx *SemanticIndex) Remove(id string)
- func (idx *SemanticIndex) Search(ctx context.Context, query string, topK int) ([]*SearchResult, error)
- func (idx *SemanticIndex) SearchByEmbedding(query Embedding, topK int) []*SearchResult
- func (idx *SemanticIndex) SearchByEmbeddingWithOptions(query Embedding, opts *SearchOptions) *SearchResponse
- func (idx *SemanticIndex) SearchWithOptions(ctx context.Context, query string, opts *SearchOptions) (*SearchResponse, error)
- type SentenceChunker
Constants ¶
const ( // DefaultChunkSize is the default character count for fixed-size chunking. DefaultChunkSize = 512 // DefaultChunkOverlap is the default overlap between adjacent chunks. DefaultChunkOverlap = 64 // DefaultMaxChunks is the maximum number of chunks per document. DefaultMaxChunks = 100 )
Chunking defaults for code index.
Variables ¶
This section is empty.
Functions ¶
func EuclideanDistance ¶
EuclideanDistance calculates Euclidean distance between two vectors.
Types ¶
type Chunker ¶
type Chunker interface {
Chunk(ctx context.Context, doc *Document) []*DocumentChunk
}
Chunker splits documents into chunks.
type CodeDocument ¶
type CodeDocument struct {
*Document
FilePath string `json:"file_path"`
Language string `json:"language"`
Symbols []CodeSymbol `json:"symbols,omitempty"`
}
CodeDocument represents a code file for indexing.
type CodeIndex ¶
type CodeIndex struct {
*SemanticIndex
// contains filtered or unexported fields
}
CodeIndex indexes code files for semantic search.
func NewCodeIndex ¶
func NewCodeIndex(embedder EmbeddingProvider, opts ...IndexOption) *CodeIndex
NewCodeIndex creates a new code index.
func (*CodeIndex) SearchCode ¶
func (idx *CodeIndex) SearchCode(ctx context.Context, query string, topK int, language string) ([]*SearchResult, error)
SearchCode searches code with optional language filter.
type CodeSymbol ¶
type CodeSymbol struct {
Name string `json:"name"`
Kind string `json:"kind"` // "function", "class", "method", etc.
StartLine int `json:"start_line"`
EndLine int `json:"end_line"`
Signature string `json:"signature,omitempty"`
}
CodeSymbol represents a code symbol (function, class, etc.)
type Document ¶
type Document struct {
ID string `json:"id"`
Content string `json:"content"`
Metadata map[string]any `json:"metadata,omitempty"`
Embedding Embedding `json:"embedding,omitempty"`
Chunks []*DocumentChunk `json:"chunks,omitempty"`
}
Document represents a searchable document.
type DocumentChunk ¶
type DocumentChunk struct {
ID string `json:"id"`
DocumentID string `json:"document_id"`
Content string `json:"content"`
StartPos int `json:"start_pos"`
EndPos int `json:"end_pos"`
Embedding Embedding `json:"embedding,omitempty"`
}
DocumentChunk represents a chunk of a document.
type EmbeddingProvider ¶
type EmbeddingProvider interface {
// Embed generates an embedding for the given text.
Embed(ctx context.Context, text string) (Embedding, error)
// EmbedBatch generates embeddings for multiple texts.
EmbedBatch(ctx context.Context, texts []string) ([]Embedding, error)
// Dimension returns the embedding dimension.
Dimension() int
}
EmbeddingProvider generates embeddings for text.
type FixedSizeChunker ¶
type FixedSizeChunker struct {
Size int
Overlap int
MaxChunks int // Maximum chunks per document (0 = unlimited)
}
FixedSizeChunker splits by character count.
func NewFixedSizeChunker ¶
func NewFixedSizeChunker(size, overlap int) *FixedSizeChunker
NewFixedSizeChunker creates a fixed size chunker with default max chunks limit. Overlap must be less than Size to ensure forward progress; values are clamped if invalid.
func NewFixedSizeChunkerWithLimit ¶ added in v0.3.0
func NewFixedSizeChunkerWithLimit(size, overlap, maxChunks int) *FixedSizeChunker
NewFixedSizeChunkerWithLimit creates a fixed size chunker with configurable max chunks. Overlap must be less than Size to ensure forward progress; values are clamped if invalid. maxChunks of 0 means unlimited.
func (*FixedSizeChunker) Chunk ¶
func (c *FixedSizeChunker) Chunk(ctx context.Context, doc *Document) []*DocumentChunk
Chunk splits a document into fixed-size chunks. If MaxChunks is set and exceeded, chunks are truncated to preserve the first N.
type IndexConfig ¶
IndexConfig configures the semantic index.
func DefaultIndexConfig ¶
func DefaultIndexConfig() *IndexConfig
DefaultIndexConfig returns sensible defaults.
type IndexOption ¶
type IndexOption func(*SemanticIndex)
IndexOption configures a SemanticIndex.
func WithMaxEntries ¶
func WithMaxEntries(max int) IndexOption
WithMaxEntries sets the maximum number of documents before LRU eviction. Default is 0 (unlimited). Recommended: 100000 for memory-constrained environments.
func WithRateLimiter ¶ added in v0.3.0
func WithRateLimiter(limit rate.Limit, burst int) IndexOption
WithRateLimiter sets rate limiting for embedding API calls. limit is requests per second (e.g., rate.Limit(100.0/60.0) for 100 req/min). burst is the maximum burst size (typically 1-10). Default is nil (no rate limiting).
type SearchOptions ¶
type SearchOptions struct {
// Offset skips the first N results (for pagination).
Offset int
// Limit caps the number of returned results.
// If zero, uses the topK parameter as limit.
Limit int
}
SearchOptions configures search behavior.
type SearchResponse ¶
type SearchResponse struct {
Results []*SearchResult `json:"results"`
TotalCount int `json:"total_count"`
Offset int `json:"offset"`
Limit int `json:"limit"`
}
SearchResponse wraps search results with pagination info.
type SearchResult ¶
type SearchResult struct {
Document *Document `json:"document"`
Chunk *DocumentChunk `json:"chunk,omitempty"`
Score float64 `json:"score"`
Highlights []string `json:"highlights,omitempty"`
}
SearchResult represents a search result.
type SemanticIndex ¶
type SemanticIndex struct {
// contains filtered or unexported fields
}
SemanticIndex stores documents with embeddings for search.
func NewSemanticIndex ¶
func NewSemanticIndex(embedder EmbeddingProvider, chunker Chunker, opts ...IndexOption) *SemanticIndex
NewSemanticIndex creates a new semantic index.
func (*SemanticIndex) Add ¶
func (idx *SemanticIndex) Add(ctx context.Context, doc *Document) error
Add adds a document to the index.
func (*SemanticIndex) AddBatch ¶
func (idx *SemanticIndex) AddBatch(ctx context.Context, docs []*Document) error
AddBatch adds multiple documents.
func (*SemanticIndex) Count ¶
func (idx *SemanticIndex) Count() int
Count returns the number of indexed documents.
func (*SemanticIndex) Get ¶
func (idx *SemanticIndex) Get(id string) *Document
Get retrieves a document by ID and updates its LRU position.
func (*SemanticIndex) HybridSearch ¶
func (idx *SemanticIndex) HybridSearch(ctx context.Context, query string, topK int, keywordWeight float64) ([]*SearchResult, error)
HybridSearch combines semantic and keyword search.
func (*SemanticIndex) KeywordSearch ¶
func (idx *SemanticIndex) KeywordSearch(query string, topK int) []*SearchResult
KeywordSearch performs basic keyword matching.
func (*SemanticIndex) Remove ¶
func (idx *SemanticIndex) Remove(id string)
Remove removes a document from the index.
func (*SemanticIndex) Search ¶
func (idx *SemanticIndex) Search(ctx context.Context, query string, topK int) ([]*SearchResult, error)
Search performs semantic search.
func (*SemanticIndex) SearchByEmbedding ¶
func (idx *SemanticIndex) SearchByEmbedding(query Embedding, topK int) []*SearchResult
SearchByEmbedding searches by embedding vector.
func (*SemanticIndex) SearchByEmbeddingWithOptions ¶
func (idx *SemanticIndex) SearchByEmbeddingWithOptions(query Embedding, opts *SearchOptions) *SearchResponse
SearchByEmbeddingWithOptions searches by embedding vector with pagination.
func (*SemanticIndex) SearchWithOptions ¶
func (idx *SemanticIndex) SearchWithOptions(ctx context.Context, query string, opts *SearchOptions) (*SearchResponse, error)
SearchWithOptions performs semantic search with pagination.
type SentenceChunker ¶
type SentenceChunker struct {
MaxSentences int
Overlap int
MaxChunks int // Maximum chunks per document (0 = unlimited)
}
SentenceChunker splits by sentences.
func NewSentenceChunker ¶
func NewSentenceChunker(maxSentences, overlap int) *SentenceChunker
NewSentenceChunker creates a sentence-based chunker with default max chunks limit. Overlap must be less than MaxSentences to ensure forward progress; values are clamped if invalid.
func NewSentenceChunkerWithLimit ¶ added in v0.3.0
func NewSentenceChunkerWithLimit(maxSentences, overlap, maxChunks int) *SentenceChunker
NewSentenceChunkerWithLimit creates a sentence-based chunker with configurable max chunks. Overlap must be less than MaxSentences to ensure forward progress; values are clamped if invalid. maxChunks of 0 means unlimited.
func (*SentenceChunker) Chunk ¶
func (c *SentenceChunker) Chunk(ctx context.Context, doc *Document) []*DocumentChunk
Chunk splits a document by sentences. If MaxChunks is set and exceeded, chunks are truncated to preserve the first N.