/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tuple

import (
	"testing"

	"github.com/stretchr/testify/assert"

	"github.com/apache/datasketches-go/theta"
)

func TestHashtable_HashStringAndScreen(t *testing.T) {
	testCases := []struct {
		name       string
		data       string
		theta      uint64
		seed       uint64
		wantErrMsg string
	}{
		{
			name:       "normal string with max theta",
			data:       "hello world",
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "empty string",
			data:       "",
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "string with special characters",
			data:       "test@#$%^&*()",
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "unicode string",
			data:       "가나다라마바사",
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "with low theta (likely filtered)",
			data:       "test",
			theta:      1,
			seed:       theta.DefaultSeed,
			wantErrMsg: "hash exceeds theta",
		},
		{
			name:       "different seed",
			data:       "test",
			theta:      theta.MaxTheta,
			seed:       99999,
			wantErrMsg: "",
		},
	}
	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, tc.theta, tc.seed, true)
			hash, err := ht.HashStringAndScreen(tc.data)

			assert.False(t, ht.isEmpty)
			if tc.wantErrMsg != "" {
				assert.ErrorContains(t, err, tc.wantErrMsg)
			} else {
				assert.NotZero(t, hash, "Expected non-zero hash for data: %s", tc.data)
			}
		})
	}
}

func TestHashtable_HashStringAndScreenConsistency(t *testing.T) {
	ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

	hash1, err := ht.HashStringAndScreen("test")
	assert.NoError(t, err)
	hash2, err := ht.HashStringAndScreen("test")
	assert.NoError(t, err)

	assert.Equal(t, hash1, hash2, "Same string should produce same hash")
}

func TestHashtable_HashInt32AndScreen(t *testing.T) {
	testCases := []struct {
		name       string
		data       int32
		theta      uint64
		seed       uint64
		wantErrMsg string
	}{
		{
			name:       "positive integer",
			data:       12345,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "negative integer",
			data:       -12345,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "zero",
			data:       0,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "max int32",
			data:       2147483647,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "min int32",
			data:       -2147483648,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "with very low theta (likely filtered)",
			data:       12345,
			theta:      1,
			seed:       theta.DefaultSeed,
			wantErrMsg: "hash exceeds theta",
		},
		{
			name:       "different seed",
			data:       12345,
			theta:      theta.MaxTheta,
			seed:       99999,
			wantErrMsg: "",
		},
	}
	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, tc.theta, tc.seed, true)
			hash, err := ht.HashInt32AndScreen(tc.data)

			assert.False(t, ht.isEmpty)

			if tc.wantErrMsg != "" {
				assert.ErrorContains(t, err, tc.wantErrMsg)
			} else {
				assert.NotZero(t, hash, "Expected non-zero hash for data: %d", tc.data)
			}
		})
	}
}

func TestHashtable_HashInt32AndScreenConsistency(t *testing.T) {
	ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, theta.MaxTheta, theta.DefaultSeed, true)
	hash1, err := ht.HashInt32AndScreen(42)
	assert.NoError(t, err)
	hash2, err := ht.HashInt32AndScreen(42)
	assert.NoError(t, err)
	assert.Equal(t, hash1, hash2, "Same int32 should produce same hash")
}

func TestHashtable_HashInt64AndScreen(t *testing.T) {
	testCases := []struct {
		name       string
		data       int64
		theta      uint64
		seed       uint64
		wantErrMsg string
	}{
		{
			name:       "positive integer",
			data:       1234567890,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "negative integer",
			data:       -1234567890,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "zero",
			data:       0,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "max int64",
			data:       9223372036854775807,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "min int64",
			data:       -9223372036854775808,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "large positive value",
			data:       9876543210123456,
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "with very low theta (likely filtered)",
			data:       1234567890,
			theta:      1,
			seed:       theta.DefaultSeed,
			wantErrMsg: "hash exceeds theta",
		},
		{
			name:       "different seed",
			data:       1234567890,
			theta:      theta.MaxTheta,
			seed:       55555,
			wantErrMsg: "",
		},
	}
	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, tc.theta, tc.seed, true)
			hash, err := ht.HashInt64AndScreen(tc.data)

			assert.False(t, ht.isEmpty)

			if tc.wantErrMsg != "" {
				assert.ErrorContains(t, err, tc.wantErrMsg)
			} else {
				assert.NotZero(t, hash, "Expected non-zero hash for data: %d", tc.data)
			}
		})
	}
}

func TestHashtable_HashInt64AndScreenConsistency(t *testing.T) {
	ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, theta.MaxTheta, theta.DefaultSeed, true)
	hash1, err := ht.HashInt64AndScreen(123456789)
	assert.NoError(t, err)
	hash2, err := ht.HashInt64AndScreen(123456789)
	assert.NoError(t, err)
	assert.Equal(t, hash1, hash2, "Same int64 should produce same hash")
}

func TestHashtable_HashBytesAndScreen(t *testing.T) {
	testCases := []struct {
		name       string
		data       []byte
		theta      uint64
		seed       uint64
		wantErrMsg string
	}{
		{
			name:       "normal byte array",
			data:       []byte{1, 2, 3, 4, 5},
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "empty byte array",
			data:       []byte{},
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "single byte",
			data:       []byte{42},
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "byte array from string",
			data:       []byte("hello world"),
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "byte array with zeros",
			data:       []byte{0, 0, 0, 0},
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "byte array with max values",
			data:       []byte{255, 255, 255, 255},
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "large byte array",
			data:       make([]byte, 1000),
			theta:      theta.MaxTheta,
			seed:       theta.DefaultSeed,
			wantErrMsg: "",
		},
		{
			name:       "with very low theta (likely filtered)",
			data:       []byte{1, 2, 3, 4, 5},
			theta:      100,
			seed:       theta.DefaultSeed,
			wantErrMsg: "hash exceeds theta",
		},
		{
			name:       "different seed",
			data:       []byte{1, 2, 3, 4, 5},
			theta:      theta.MaxTheta,
			seed:       77777,
			wantErrMsg: "",
		},
	}
	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, tc.theta, tc.seed, true)
			hash, err := ht.HashBytesAndScreen(tc.data)

			assert.False(t, ht.isEmpty)

			if tc.wantErrMsg != "" {
				assert.ErrorContains(t, err, tc.wantErrMsg)
			} else {
				assert.NotZero(t, hash, "Expected non-zero hash for data: %v", tc.data)
			}
		})
	}
}

func TestHashtable_HashBytesAndScreenConsistency(t *testing.T) {
	ht := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, theta.MaxTheta, theta.DefaultSeed, true)
	hash1, err := ht.HashBytesAndScreen([]byte{1, 2, 3, 4, 5})
	assert.NoError(t, err)
	hash2, err := ht.HashBytesAndScreen([]byte{1, 2, 3, 4, 5})
	assert.NoError(t, err)
	assert.Equal(t, hash1, hash2, "Same byte array should produce same hash")
}

func TestHashTable_Find(t *testing.T) {
	sketch := newHashtable[*float64Summary](2, 4, theta.ResizeX1, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

	e := entry[*float64Summary]{
		Hash: uint64(12345),
	}

	// Find an empty table
	index, err := sketch.Find(e.Hash)
	assert.ErrorIs(t, err, ErrKeyNotFound)

	sketch.entries[index] = e
	sketch.numEntries++

	// Find the inserted key
	index2, err := sketch.Find(e.Hash)
	assert.NoError(t, err)
	assert.Equal(t, index, index2)

	// Table is full
	size := 1 << sketch.lgCurSize
	for i := 0; i < size; i++ {
		sketch.entries[i] = entry[*float64Summary]{
			Hash: uint64(i + 1000),
		}
	}
	sketch.numEntries = uint32(size)

	index, err = sketch.Find(e.Hash)
	assert.ErrorIs(t, err, ErrKeyNotFoundAndNoEmptySlots)
}

func TestHashtable_Insert(t *testing.T) {
	t.Run("Without resizing & rebuilding", func(t *testing.T) {
		sketch := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

		e := entry[*float64Summary]{
			Hash: uint64(12345),
		}
		index, err := sketch.Find(e.Hash)
		assert.ErrorIs(t, err, ErrKeyNotFound)

		sketch.Insert(index, e)
		assert.Equal(t, 1, int(sketch.numEntries))

		index2, err := sketch.Find(e.Hash)
		assert.NoError(t, err)
		assert.Equal(t, sketch.entries[index2], e)
	})

	t.Run("With resizing", func(t *testing.T) {
		lgCurSize := uint8(2)
		lgNomSize := uint8(4)
		sketch := newHashtable[*float64Summary](lgCurSize, lgNomSize, theta.ResizeX2, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

		initialSize := sketch.lgCurSize

		insertedEntries := make([]entry[*float64Summary], 0)
		numToInsert := 10 // Insert enough to trigger resize
		for i := 0; i < numToInsert; i++ {
			e := entry[*float64Summary]{
				Hash: uint64(i + 1000),
			}
			index, err := sketch.Find(e.Hash)
			if err == nil {
				continue
			}

			sketch.Insert(index, e)
			insertedEntries = append(insertedEntries, e)
		}

		assert.Greater(t, sketch.lgCurSize, initialSize, "Table should have been resized")
		assert.Equal(t, numToInsert, len(insertedEntries), "Should have inserted all keys")

		for _, insertedEntry := range insertedEntries {
			index, err := sketch.Find(insertedEntry.Hash)
			assert.NoError(t, err)
			assert.Equal(t, insertedEntry, sketch.entries[index], "Key value should match")
		}
	})

	t.Run("With rebuilding", func(t *testing.T) {
		lgNomSize := uint8(3)
		lgCurSize := uint8(4)
		sketch := newHashtable[*float64Summary](lgCurSize, lgNomSize, theta.ResizeX2, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

		numToInsert := 100
		insertedEntries := make([]entry[*float64Summary], 0)
		rebuildOccurred := false

		for i := 0; i < numToInsert; i++ {
			e := entry[*float64Summary]{
				Hash: uint64(i + 1000),
			}
			index, err := sketch.Find(e.Hash)
			if err == nil {
				continue
			}
			if index == -1 {
				// Table is full, cannot insert more
				break
			}

			prevTheta := sketch.theta
			sketch.Insert(index, e)
			insertedEntries = append(insertedEntries, e)

			// Rebuild is detected when theta decreases
			if sketch.theta < prevTheta {
				rebuildOccurred = true
				nominalSize := uint32(1 << lgNomSize)
				assert.Equal(t, nominalSize, sketch.numEntries, "After rebuild, entries should equal nominal size")
				assert.Less(t, sketch.theta, theta.MaxTheta, "Theta should decrease after rebuild")
				break
			}
		}

		assert.True(t, rebuildOccurred, "Rebuild should have occurred")

		foundCount := 0
		for _, insertedEntry := range insertedEntries {
			index, err := sketch.Find(insertedEntry.Hash)
			if err == nil && index >= 0 && sketch.entries[index] == insertedEntry {
				foundCount++
			}
		}

		assert.Greater(t, foundCount, 0, "Some entries should still be accessible after rebuild")
	})
}

func TestHashtable_Trim(t *testing.T) {
	t.Run("rebuild", func(t *testing.T) {
		lgNomSize := uint8(3)
		lgCurSize := uint8(5)
		sketch := newHashtable[*float64Summary](lgCurSize, lgNomSize, theta.ResizeX2, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

		// Insert entries exceeding nominal size
		numToInsert := 20
		for i := 0; i < numToInsert; i++ {
			e := entry[*float64Summary]{
				Hash: uint64(i + 5000),
			}
			index, err := sketch.Find(e.Hash)
			if err == nil {
				continue
			}

			sketch.entries[index] = e
			sketch.numEntries++
		}

		initialNumEntries := sketch.numEntries
		nominalSize := uint32(1 << lgNomSize)

		assert.Greater(t, initialNumEntries, nominalSize, "numEntries should exceed nominal size before Trim")

		sketch.Trim()

		assert.Equal(t, nominalSize, sketch.numEntries, "After Trim, numEntries should equal nominal size")
		assert.Less(t, sketch.theta, theta.MaxTheta, "Theta should decrease after Trim")
	})

	t.Run("no op", func(t *testing.T) {
		lgNomSize := uint8(4)
		lgCurSize := uint8(4)
		sketch := newHashtable[*float64Summary](lgCurSize, lgNomSize, theta.ResizeX2, 1.0, theta.MaxTheta, theta.DefaultSeed, true)

		// Insert fewer entries than the nominal size
		numToInsert := 5
		for i := 0; i < numToInsert; i++ {
			e := entry[*float64Summary]{
				Hash: uint64(i + 6000),
			}
			index, err := sketch.Find(e.Hash)
			if err == nil {
				continue
			}

			sketch.entries[index] = e
			sketch.numEntries++
		}

		initialNumEntries := sketch.numEntries
		initialTheta := sketch.theta
		nominalSize := uint32(1 << lgNomSize)

		assert.Less(t, initialNumEntries, nominalSize, "numEntries should be less than nominal size")

		sketch.Trim()

		assert.Equal(t, initialNumEntries, sketch.numEntries, "numEntries should not change when less than nominal size")
		assert.Equal(t, initialTheta, sketch.theta, "Theta should not change when entries <= nominal size")
	})
}

func TestHashtable_Reset(t *testing.T) {
	sketch := newHashtable[*float64Summary](4, 4, theta.ResizeX1, 0.5, theta.MaxTheta, theta.DefaultSeed, false)

	sketch.entries[0] = entry[*float64Summary]{
		Hash: uint64(100),
	}
	sketch.entries[5] = entry[*float64Summary]{
		Hash: uint64(200),
	}
	sketch.numEntries = 2
	sketch.isEmpty = false

	sketch.Reset()

	assert.True(t, sketch.isEmpty)
	assert.Zero(t, sketch.numEntries)
	// Verify all entries are zero
	for i, e := range sketch.entries {
		assert.Zero(t, e, "entry at index %d should be zero after reset", i)
	}

	expectedTheta := startingThetaFromP(sketch.p)
	assert.Equal(t, expectedTheta, sketch.theta, "theta should be %d after reset", expectedTheta)
}
