packagemainimport("database/sql""fmt""log""os"_"github.com/duckdb/duckdb-go/v2")funcmain(){iflen(os.Args)!=2{fmt.Fprintf(os.Stderr,"Usage: %s <database_file>\n",os.Args[0])fmt.Fprintf(os.Stderr,"Example: %s honeypot.db\n",os.Args[0])os.Exit(1)}databaseFile:=os.Args[1]// Open database connectiondb,err:=sql.Open("duckdb",databaseFile)iferr!=nil{log.Fatalf("failed to open database: %v",err)}deferdb.Close()// First, count how many duplicate entries exist (excluding the ones we'll keep)varduplicateCountinterr=db.QueryRow(` SELECT COUNT(*) - (
SELECT COUNT(*)
FROM (
SELECT DISTINCT time, remote_addr
FROM honeypot_events
)
)
FROM honeypot_events
`).Scan(&duplicateCount)iferr!=nil{log.Fatalf("failed to count duplicates: %v",err)}ifduplicateCount==0{fmt.Println("No duplicate entries found (where timestamp and remote_addr are the same)")return}// Count how many unique duplicate groups existvarduplicateGroupsinterr=db.QueryRow(` SELECT COUNT(*)
FROM (
SELECT time, remote_addr
FROM honeypot_events
GROUP BY time, remote_addr
HAVING COUNT(*) > 1
)
`).Scan(&duplicateGroups)iferr!=nil{log.Fatalf("failed to count duplicate groups: %v",err)}// Ask for confirmationfmt.Printf("Found %d duplicate entries across %d duplicate groups\n",duplicateCount,duplicateGroups)fmt.Print("Are you sure you want to delete these duplicate entries? (yes/no): ")varconfirmationstringfmt.Scanln(&confirmation)ifconfirmation!="yes"{fmt.Println("Deletion cancelled.")return}// Delete duplicates, keeping the entry with the minimum ID for each (time, remote_addr) combination// This uses a subquery to identify which IDs to keep (MIN(id) per group), then deletes all othersresult,err:=db.Exec(` DELETE FROM honeypot_events
WHERE (time, remote_addr) IN (
SELECT time, remote_addr
FROM honeypot_events
GROUP BY time, remote_addr
HAVING COUNT(*) > 1
)
AND id NOT IN (
SELECT MIN(id)
FROM honeypot_events
GROUP BY time, remote_addr
)
`)iferr!=nil{log.Fatalf("failed to delete duplicates: %v",err)}rowsAffected,err:=result.RowsAffected()iferr!=nil{log.Fatalf("failed to get rows affected: %v",err)}fmt.Printf("Successfully deleted %d duplicate entries\n",rowsAffected)}