145 lines
4.4 KiB
Go
145 lines
4.4 KiB
Go
package library
|
|
|
|
import (
|
|
"database/sql"
|
|
"path"
|
|
"strconv"
|
|
"velvettear/badger/internal/config"
|
|
"velvettear/badger/internal/database"
|
|
"velvettear/badger/internal/database/models"
|
|
"velvettear/badger/internal/log"
|
|
"velvettear/badger/internal/metadata"
|
|
"velvettear/badger/internal/tools"
|
|
)
|
|
|
|
// exported function(s)
|
|
func FindDuplicates() {
|
|
timestamp := tools.LogTimestamp()
|
|
var duplicates []Duplicate
|
|
rows, error := database.Connection().Model(&models.Track{}).Select("id", "path", "fingerprint", "bitrate").Rows()
|
|
if error != nil {
|
|
log.Error("encountered an error selecting all tracks as rows", error.Error())
|
|
return
|
|
}
|
|
defer rows.Close()
|
|
formatMismatch := config.DuplicatesFormatMismatch()
|
|
var comparisonObjects []comparisonObject
|
|
for rows.Next() {
|
|
comparisonObject, error := toComparisonObject(rows)
|
|
if error != nil {
|
|
continue
|
|
}
|
|
comparisonObjects = append(comparisonObjects, comparisonObject)
|
|
}
|
|
waitChannel := make(chan struct{}, config.Concurrency())
|
|
var objectFormat string
|
|
var duplicateIndices []int
|
|
done := 0
|
|
objectCount := len(comparisonObjects)
|
|
log.Info("comparing "+strconv.Itoa(objectCount)+" audio fingerprints for duplicates...", "concurrency: "+strconv.Itoa(config.Concurrency()))
|
|
for objectCount > 0 {
|
|
object := comparisonObjects[0]
|
|
comparisonObjects = comparisonObjects[1:]
|
|
if formatMismatch {
|
|
objectFormat = path.Ext(object.path)
|
|
}
|
|
waitChannel <- struct{}{}
|
|
go func(object comparisonObject) {
|
|
tmpTimestamp := tools.LogTimestamp()
|
|
for index, comparisonObject := range comparisonObjects {
|
|
if formatMismatch && objectFormat == path.Ext(comparisonObject.path) {
|
|
continue
|
|
}
|
|
duplicate := getDuplicate(&object, &comparisonObject)
|
|
if !duplicate.isValid() {
|
|
continue
|
|
}
|
|
log.Debug("duplicate track detected", "id '"+strconv.Itoa(duplicate.id)+"', good file: "+duplicate.good+", bad file: "+duplicate.bad+", score: "+strconv.FormatFloat(duplicate.score, 'f', 2, 64))
|
|
duplicates = append(duplicates, duplicate)
|
|
if duplicate.id == object.id {
|
|
break
|
|
}
|
|
duplicateIndices = append(duplicateIndices, index)
|
|
}
|
|
done++
|
|
objectCount := len(comparisonObjects)
|
|
log.DebugTimed("finished comparison of the audio fingerprint for track (id: '"+strconv.Itoa(object.id)+"')", tmpTimestamp, strconv.Itoa(done)+"/"+strconv.Itoa(objectCount))
|
|
<-waitChannel
|
|
}(object)
|
|
comparisonObjects = filterDuplicates(comparisonObjects, duplicateIndices)
|
|
duplicateIndices = nil
|
|
}
|
|
log.InfoTimed("found "+strconv.Itoa(len(duplicates))+" duplicates", timestamp)
|
|
}
|
|
|
|
func filterDuplicates(objects []comparisonObject, duplicateIndices []int) []comparisonObject {
|
|
if len(objects) == 0 || len(duplicateIndices) == 0 {
|
|
return objects
|
|
}
|
|
timestamp := tools.LogTimestamp()
|
|
removed := 0
|
|
var tmp []comparisonObject
|
|
for index, object := range objects {
|
|
copyObject := true
|
|
for _, value := range duplicateIndices {
|
|
if index == value {
|
|
copyObject = false
|
|
break
|
|
}
|
|
}
|
|
if !copyObject {
|
|
removed++
|
|
continue
|
|
}
|
|
tmp = append(tmp, object)
|
|
}
|
|
log.DebugTimed("filtered "+strconv.Itoa(removed)+" duplicate track(s) from list", timestamp)
|
|
return tmp
|
|
}
|
|
|
|
func toComparisonObject(row *sql.Rows) (comparisonObject, error) {
|
|
var comparisonObject comparisonObject
|
|
var tmp string
|
|
row.Scan(&comparisonObject.id, &comparisonObject.path, &tmp, &comparisonObject.bitrate)
|
|
fingerprint, error := metadata.FingerprintFromString(tmp)
|
|
if error != nil {
|
|
log.Error("encountered an error parsing the audio fingerprint for file '" + comparisonObject.path + "' from 'string' to '[]int32'")
|
|
return comparisonObject, error
|
|
}
|
|
comparisonObject.fingerprint = fingerprint.Value
|
|
return comparisonObject, nil
|
|
}
|
|
|
|
func getDuplicate(object *comparisonObject, comparisonObject *comparisonObject) Duplicate {
|
|
score := metadata.CompareWith(object.fingerprint, comparisonObject.fingerprint)
|
|
if score < config.DuplicatesFingerprintThreshold() {
|
|
return Duplicate{}
|
|
}
|
|
var duplicate Duplicate
|
|
if object.bitrate > comparisonObject.bitrate {
|
|
duplicate = Duplicate{id: comparisonObject.id, good: object.path, bad: comparisonObject.path}
|
|
} else {
|
|
duplicate = Duplicate{id: object.id, good: comparisonObject.path, bad: object.path}
|
|
}
|
|
duplicate.score = score
|
|
return duplicate
|
|
}
|
|
|
|
func (duplicate *Duplicate) isValid() bool {
|
|
return duplicate.id > 0
|
|
}
|
|
|
|
// struct(s)
|
|
type Duplicate struct {
|
|
id int
|
|
good string
|
|
bad string
|
|
score float64
|
|
}
|
|
|
|
type comparisonObject struct {
|
|
id int
|
|
path string
|
|
fingerprint []int32
|
|
bitrate int
|
|
}
|