package library import ( "database/sql" "path" "strconv" "velvettear/badger/internal/config" "velvettear/badger/internal/database" "velvettear/badger/internal/database/models" "velvettear/badger/internal/log" "velvettear/badger/internal/metadata" "velvettear/badger/internal/tools" ) // exported function(s) func FindDuplicates() { timestamp := tools.LogTimestamp() var duplicates []Duplicate rows, error := database.Connection().Model(&models.Track{}).Select("id", "path", "fingerprint", "bitrate").Rows() if error != nil { log.Error("encountered an error selecting all tracks as rows", error.Error()) return } defer rows.Close() formatMismatch := config.DuplicatesFormatMismatch() var comparisonObjects []comparisonObject for rows.Next() { comparisonObject, error := toComparisonObject(rows) if error != nil { continue } comparisonObjects = append(comparisonObjects, comparisonObject) } waitChannel := make(chan struct{}, config.Concurrency()) var objectFormat string var duplicateIndices []int done := 0 objectCount := len(comparisonObjects) log.Info("comparing "+strconv.Itoa(objectCount)+" audio fingerprints for duplicates...", "concurrency: "+strconv.Itoa(config.Concurrency())) for objectCount > 0 { object := comparisonObjects[0] comparisonObjects = comparisonObjects[1:] if formatMismatch { objectFormat = path.Ext(object.path) } waitChannel <- struct{}{} go func(object comparisonObject) { tmpTimestamp := tools.LogTimestamp() for index, comparisonObject := range comparisonObjects { if formatMismatch && objectFormat == path.Ext(comparisonObject.path) { continue } duplicate := getDuplicate(&object, &comparisonObject) if !duplicate.isValid() { continue } log.Debug("duplicate track detected", "id '"+strconv.Itoa(duplicate.id)+"', good file: "+duplicate.good+", bad file: "+duplicate.bad+", score: "+strconv.FormatFloat(duplicate.score, 'f', 2, 64)) duplicates = append(duplicates, duplicate) if duplicate.id == object.id { break } duplicateIndices = append(duplicateIndices, index) } done++ objectCount := len(comparisonObjects) log.DebugTimed("finished comparison of the audio fingerprint for track (id: '"+strconv.Itoa(object.id)+"')", tmpTimestamp, strconv.Itoa(done)+"/"+strconv.Itoa(objectCount)) <-waitChannel }(object) comparisonObjects = filterDuplicates(comparisonObjects, duplicateIndices) duplicateIndices = nil } log.InfoTimed("found "+strconv.Itoa(len(duplicates))+" duplicates", timestamp) } func filterDuplicates(objects []comparisonObject, duplicateIndices []int) []comparisonObject { if len(objects) == 0 || len(duplicateIndices) == 0 { return objects } timestamp := tools.LogTimestamp() removed := 0 var tmp []comparisonObject for index, object := range objects { copyObject := true for _, value := range duplicateIndices { if index == value { copyObject = false break } } if !copyObject { removed++ continue } tmp = append(tmp, object) } log.DebugTimed("filtered "+strconv.Itoa(removed)+" duplicate track(s) from list", timestamp) return tmp } func toComparisonObject(row *sql.Rows) (comparisonObject, error) { var comparisonObject comparisonObject var tmp string row.Scan(&comparisonObject.id, &comparisonObject.path, &tmp, &comparisonObject.bitrate) fingerprint, error := metadata.FingerprintFromString(tmp) if error != nil { log.Error("encountered an error parsing the audio fingerprint for file '" + comparisonObject.path + "' from 'string' to '[]int32'") return comparisonObject, error } comparisonObject.fingerprint = fingerprint.Value return comparisonObject, nil } func getDuplicate(object *comparisonObject, comparisonObject *comparisonObject) Duplicate { score := metadata.CompareWith(object.fingerprint, comparisonObject.fingerprint) if score < config.DuplicatesFingerprintThreshold() { return Duplicate{} } var duplicate Duplicate if object.bitrate > comparisonObject.bitrate { duplicate = Duplicate{id: comparisonObject.id, good: object.path, bad: comparisonObject.path} } else { duplicate = Duplicate{id: object.id, good: comparisonObject.path, bad: object.path} } duplicate.score = score return duplicate } func (duplicate *Duplicate) isValid() bool { return duplicate.id > 0 } // struct(s) type Duplicate struct { id int good string bad string score float64 } type comparisonObject struct { id int path string fingerprint []int32 bitrate int }