Documentation
¶
Index ¶
- func BasicReRank(results map[string][]string, config globals.Args) map[string][]string
- func BuildBlugeIndexFromJSONL(jsonlPath, indexDir string) error
- func DecodeEntryToVectors(entry []uint64, Dim int) ([][]float32, error)
- func FilterJSONLByIDs(inputPath, outputPath string, docIDs []string) error
- func HashFloat32s(xs []float32) [32]byte
- func LoadCorpus(path string) ([]beirDoc, error)
- func LoadQrels(path string) (qrels, error)
- func LoadQueries(query_path string) ([]globals.Query, error)
- func MakeLookup(meta globals.DatasetMetadata, dbsize, dimensions int) map[[32]byte]string
- func MakeUnigramDB(reader *bluge.Reader, dataset globals.DatasetMetadata, config globals.Args) [][]string
- func Must(err error)
- func PirPreprocessAndLoadData(idxPath string) [][]uint64
- func ReadCSV(path string) ([][]string, error)
- func StringsToUint64Grid(strs []string) ([][]uint64, int, error)
- func Uint64GridToStrings(grid [][]uint64) ([]string, error)
- func WriteCSV(path string, data [][]string) error
- type Config
- type DBentry
- type VecBins
- func (v VecBins) DoSearch(QID string, _ int) (globals.Decodable, error)
- func (v VecBins) GetBatchNums() (uint64, uint64, uint64)
- func (v VecBins) GetMetaData() map[string]string
- func (v VecBins) MakeIndices(QID string) []uint64
- func (v VecBins) PIRPreprocess() time.Duration
- func (v VecBins) Preprocess()
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func BasicReRank ¶
Takes in a mapping from QID to DOCID and loads the query text and document text. Then Re-ranks all the docIDs based Upon the BM25 search. Returns a mapping with only the top-k (from config) documents
func FilterJSONLByIDs ¶
Takes in two pahs and a list of docIDS/qIDs and then selects those elements from inputPath before outputting ONLY them to outputPath.
func HashFloat32s ¶
func LoadCorpus ¶
func MakeLookup ¶
func MakeLookup(meta globals.DatasetMetadata, dbsize, dimensions int) map[[32]byte]string
func MakeUnigramDB ¶
func MakeUnigramDB(reader *bluge.Reader, dataset globals.DatasetMetadata, config globals.Args) [][]string
TODO: Replace bluge.reader with a generic implements
func StringsToUint64Grid ¶
StringsToUint64Grid encodes []string -> [][]uint64. Strings are easy to make into bytes, but awkward to handle as Arrays of uint64, as a result I pack multiple bytes into a uint64 instead of casting bytes into uint64s. I'm not Sure if this is an easier or harder solution... Each row: [ length | packed bytes ... | zero padding ... ]
func Uint64GridToStrings ¶
Uint64GridToStrings decodes [][]uint64 -> []string.
Types ¶
type VecBins ¶
type VecBins struct {
N int // Number of Bins
Dimensions int // Dimension of vectors
EntrySize int // number of vectors in a row (Size of one entry)
DBEntrySize uint64 // Number of bytes in an entry
DBTotalSize uint64 // in bytes
Queries map[string]globals.Query // A mapping from QID to query
EnglishTokenAnalyzer *analysis.Analyzer
PIR *pianopir.SimpleBatchPianoPIR
MaxRowSize uint
// contains filtered or unexported fields
}
func MakeVecDb ¶
MakeVecDb Takes in args from command line and then outputs a 'VecBins' object that implements the functions required for binsDB.
func ProcessVecDB ¶
func (VecBins) GetMetaData ¶
func (VecBins) MakeIndices ¶
func (VecBins) PIRPreprocess ¶
func (VecBins) Preprocess ¶
func (v VecBins) Preprocess()