bins

package
v0.0.0-...-8045c00 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2026 License: MIT Imports: 26 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BasicReRank

func BasicReRank(results map[string][]string, config globals.Args) map[string][]string

Takes in a mapping from QID to DOCID and loads the query text and document text. Then Re-ranks all the docIDs based Upon the BM25 search. Returns a mapping with only the top-k (from config) documents

func BuildBlugeIndexFromJSONL

func BuildBlugeIndexFromJSONL(jsonlPath, indexDir string) error

func DecodeEntryToVectors

func DecodeEntryToVectors(entry []uint64, Dim int) ([][]float32, error)

func FilterJSONLByIDs

func FilterJSONLByIDs(inputPath, outputPath string, docIDs []string) error

Takes in two pahs and a list of docIDS/qIDs and then selects those elements from inputPath before outputting ONLY them to outputPath.

func HashFloat32s

func HashFloat32s(xs []float32) [32]byte

func LoadCorpus

func LoadCorpus(path string) ([]beirDoc, error)

func LoadQrels

func LoadQrels(path string) (qrels, error)

func LoadQueries

func LoadQueries(query_path string) ([]globals.Query, error)

func MakeLookup

func MakeLookup(meta globals.DatasetMetadata, dbsize, dimensions int) map[[32]byte]string

func MakeUnigramDB

func MakeUnigramDB(reader *bluge.Reader, dataset globals.DatasetMetadata, config globals.Args) [][]string

TODO: Replace bluge.reader with a generic implements

func Must

func Must(err error)

func PirPreprocessAndLoadData

func PirPreprocessAndLoadData(idxPath string) [][]uint64

func ReadCSV

func ReadCSV(path string) ([][]string, error)

func StringsToUint64Grid

func StringsToUint64Grid(strs []string) ([][]uint64, int, error)

StringsToUint64Grid encodes []string -> [][]uint64. Strings are easy to make into bytes, but awkward to handle as Arrays of uint64, as a result I pack multiple bytes into a uint64 instead of casting bytes into uint64s. I'm not Sure if this is an easier or harder solution... Each row: [ length | packed bytes ... | zero padding ... ]

func Uint64GridToStrings

func Uint64GridToStrings(grid [][]uint64) ([]string, error)

Uint64GridToStrings decodes [][]uint64 -> []string.

func WriteCSV

func WriteCSV(path string, data [][]string) error

WriteCSV writes a [][]string as CSV.

Types

type Config

type Config struct {
	K         uint
	D         uint
	MaxBins   uint
	Filenames bool
	Threshold uint
}

type DBentry

type DBentry struct {
	// contains filtered or unexported fields
}

func (DBentry) Decode

func (d DBentry) Decode(config globals.Args) []string

type VecBins

type VecBins struct {
	N                    int                      // Number of Bins
	Dimensions           int                      // Dimension of vectors
	EntrySize            int                      // number of vectors in a row (Size of one entry)
	DBEntrySize          uint64                   // Number of bytes in an entry
	DBTotalSize          uint64                   // in bytes
	Queries              map[string]globals.Query // A mapping from QID to query
	EnglishTokenAnalyzer *analysis.Analyzer
	PIR                  *pianopir.SimpleBatchPianoPIR
	MaxRowSize           uint
	// contains filtered or unexported fields
}

func MakeVecDb

func MakeVecDb(config globals.Args) VecBins

MakeVecDb Takes in args from command line and then outputs a 'VecBins' object that implements the functions required for binsDB.

func ProcessVecDB

func ProcessVecDB(config globals.Args, maxRowSize uint, vectorsInBins [][][]float32) VecBins

func (VecBins) DoSearch

func (v VecBins) DoSearch(QID string, _ int) (globals.Decodable, error)

func (VecBins) GetBatchNums

func (v VecBins) GetBatchNums() (uint64, uint64, uint64)

func (VecBins) GetMetaData

func (v VecBins) GetMetaData() map[string]string

func (VecBins) MakeIndices

func (v VecBins) MakeIndices(QID string) []uint64

func (VecBins) PIRPreprocess

func (v VecBins) PIRPreprocess() time.Duration

func (VecBins) Preprocess

func (v VecBins) Preprocess()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL