// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package zstd

import (
	"math/bits"
)

// fseEntry is one entry in an FSE table.
type fseEntry struct {
	sym  uint8  // value that this entry records
	bits uint8  // number of bits to read to determine next state
	base uint16 // add those bits to this state to get the next state
}

// readFSE reads an FSE table from data starting at off.
// maxSym is the maximum symbol value.
// maxBits is the maximum number of bits permitted for symbols in the table.
// The FSE is written into table, which must be at least 1<<maxBits in size.
// This returns the number of bits in the FSE table and the new offset.
// RFC 4.1.1.
func (r *Reader) readFSE(data block, off, maxSym, maxBits int, table []fseEntry) (tableBits, roff int, err error) {
	br := r.makeBitReader(data, off)
	if err := br.moreBits(); err != nil {
		return 0, 0, err
	}

	accuracyLog := int(br.val(4)) + 5
	if accuracyLog > maxBits {
		return 0, 0, br.makeError("FSE accuracy log too large")
	}

	// The number of remaining probabilities, plus 1.
	// This determines the number of bits to be read for the next value.
	remaining := (1 << accuracyLog) + 1

	// The current difference between small and large values,
	// which depends on the number of remaining values.
	// Small values use 1 less bit.
	threshold := 1 << accuracyLog

	// The number of bits needed to compute threshold.
	bitsNeeded := accuracyLog + 1

	// The next character value.
	sym := 0

	// Whether the last count was 0.
	prev0 := false

	var norm [256]int16

	for remaining > 1 && sym <= maxSym {
		if err := br.moreBits(); err != nil {
			return 0, 0, err
		}

		if prev0 {
			// Previous count was 0, so there is a 2-bit
			// repeat flag. If the 2-bit flag is 0b11,
			// it adds 3 and then there is another repeat flag.
			zsym := sym
			for (br.bits & 0xfff) == 0xfff {
				zsym += 3 * 6
				br.bits >>= 12
				br.cnt -= 12
				if err := br.moreBits(); err != nil {
					return 0, 0, err
				}
			}
			for (br.bits & 3) == 3 {
				zsym += 3
				br.bits >>= 2
				br.cnt -= 2
				if err := br.moreBits(); err != nil {
					return 0, 0, err
				}
			}

			// We have at least 14 bits here,
			// no need to call moreBits

			zsym += int(br.val(2))

			if zsym > maxSym {
				return 0, 0, br.makeError("FSE symbol index overflow")
			}

			for ; sym < zsym; sym++ {
				norm[uint8(sym)] = 0
			}

			prev0 = false
			continue
		}

		max := (2*threshold - 1) - remaining
		var count int
		if int(br.bits&uint32(threshold-1)) < max {
			// A small value.
			count = int(br.bits & uint32((threshold - 1)))
			br.bits >>= bitsNeeded - 1
			br.cnt -= uint32(bitsNeeded - 1)
		} else {
			// A large value.
			count = int(br.bits & uint32((2*threshold - 1)))
			if count >= threshold {
				count -= max
			}
			br.bits >>= bitsNeeded
			br.cnt -= uint32(bitsNeeded)
		}

		count--
		if count >= 0 {
			remaining -= count
		} else {
			remaining--
		}
		if sym >= 256 {
			return 0, 0, br.makeError("FSE sym overflow")
		}
		norm[uint8(sym)] = int16(count)
		sym++

		prev0 = count == 0

		for remaining < threshold {
			bitsNeeded--
			threshold >>= 1
		}
	}

	if remaining != 1 {
		return 0, 0, br.makeError("too many symbols in FSE table")
	}

	for ; sym <= maxSym; sym++ {
		norm[uint8(sym)] = 0
	}

	br.backup()

	if err := r.buildFSE(off, norm[:maxSym+1], table, accuracyLog); err != nil {
		return 0, 0, err
	}

	return accuracyLog, int(br.off), nil
}

// buildFSE builds an FSE decoding table from a list of probabilities.
// The probabilities are in norm. next is scratch space. The number of bits
// in the table is tableBits.
func (r *Reader) buildFSE(off int, norm []int16, table []fseEntry, tableBits int) error {
	tableSize := 1 << tableBits
	highThreshold := tableSize - 1

	var next [256]uint16

	for i, n := range norm {
		if n >= 0 {
			next[uint8(i)] = uint16(n)
		} else {
			table[highThreshold].sym = uint8(i)
			highThreshold--
			next[uint8(i)] = 1
		}
	}

	pos := 0
	step := (tableSize >> 1) + (tableSize >> 3) + 3
	mask := tableSize - 1
	for i, n := range norm {
		for j := 0; j < int(n); j++ {
			table[pos].sym = uint8(i)
			pos = (pos + step) & mask
			for pos > highThreshold {
				pos = (pos + step) & mask
			}
		}
	}
	if pos != 0 {
		return r.makeError(off, "FSE count error")
	}

	for i := 0; i < tableSize; i++ {
		sym := table[i].sym
		nextState := next[sym]
		next[sym]++

		if nextState == 0 {
			return r.makeError(off, "FSE state error")
		}

		highBit := 15 - bits.LeadingZeros16(nextState)

		bits := tableBits - highBit
		table[i].bits = uint8(bits)
		table[i].base = (nextState << bits) - uint16(tableSize)
	}

	return nil
}

// fseBaselineEntry is an entry in an FSE baseline table.
// We use these for literal/match/length values.
// Those require mapping the symbol to a baseline value,
// and then reading zero or more bits and adding the value to the baseline.
// Rather than looking these up in separate tables,
// we convert the FSE table to an FSE baseline table.
type fseBaselineEntry struct {
	baseline uint32 // baseline for value that this entry represents
	basebits uint8  // number of bits to read to add to baseline
	bits     uint8  // number of bits to read to determine next state
	base     uint16 // add the bits to this base to get the next state
}

// Given a literal length code, we need to read a number of bits and
// add that to a baseline. For states 0 to 15 the baseline is the
// state and the number of bits is zero. RFC 3.1.1.3.2.1.1.

const literalLengthOffset = 16

var literalLengthBase = []uint32{
	16 | (1 << 24),
	18 | (1 << 24),
	20 | (1 << 24),
	22 | (1 << 24),
	24 | (2 << 24),
	28 | (2 << 24),
	32 | (3 << 24),
	40 | (3 << 24),
	48 | (4 << 24),
	64 | (6 << 24),
	128 | (7 << 24),
	256 | (8 << 24),
	512 | (9 << 24),
	1024 | (10 << 24),
	2048 | (11 << 24),
	4096 | (12 << 24),
	8192 | (13 << 24),
	16384 | (14 << 24),
	32768 | (15 << 24),
	65536 | (16 << 24),
}

// makeLiteralBaselineFSE converts the literal length fseTable to baselineTable.
func (r *Reader) makeLiteralBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error {
	for i, e := range fseTable {
		be := fseBaselineEntry{
			bits: e.bits,
			base: e.base,
		}
		if e.sym < literalLengthOffset {
			be.baseline = uint32(e.sym)
			be.basebits = 0
		} else {
			if e.sym > 35 {
				return r.makeError(off, "FSE baseline symbol overflow")
			}
			idx := e.sym - literalLengthOffset
			basebits := literalLengthBase[idx]
			be.baseline = basebits & 0xffffff
			be.basebits = uint8(basebits >> 24)
		}
		baselineTable[i] = be
	}
	return nil
}

// makeOffsetBaselineFSE converts the offset length fseTable to baselineTable.
func (r *Reader) makeOffsetBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error {
	for i, e := range fseTable {
		be := fseBaselineEntry{
			bits: e.bits,
			base: e.base,
		}
		if e.sym > 31 {
			return r.makeError(off, "FSE offset symbol overflow")
		}

		// The simple way to write this is
		//     be.baseline = 1 << e.sym
		//     be.basebits = e.sym
		// That would give us an offset value that corresponds to
		// the one described in the RFC. However, for offsets > 3
		// we have to subtract 3. And for offset values 1, 2, 3
		// we use a repeated offset.
		//
		// The baseline is always a power of 2, and is never 0,
		// so for those low values we will see one entry that is
		// baseline 1, basebits 0, and one entry that is baseline 2,
		// basebits 1. All other entries will have baseline >= 4
		// basebits >= 2.
		//
		// So we can check for RFC offset <= 3 by checking for
		// basebits <= 1. That means that we can subtract 3 here
		// and not worry about doing it in the hot loop.

		be.baseline = 1 << e.sym
		if e.sym >= 2 {
			be.baseline -= 3
		}
		be.basebits = e.sym
		baselineTable[i] = be
	}
	return nil
}

// Given a match length code, we need to read a number of bits and add
// that to a baseline. For states 0 to 31 the baseline is state+3 and
// the number of bits is zero. RFC 3.1.1.3.2.1.1.

const matchLengthOffset = 32

var matchLengthBase = []uint32{
	35 | (1 << 24),
	37 | (1 << 24),
	39 | (1 << 24),
	41 | (1 << 24),
	43 | (2 << 24),
	47 | (2 << 24),
	51 | (3 << 24),
	59 | (3 << 24),
	67 | (4 << 24),
	83 | (4 << 24),
	99 | (5 << 24),
	131 | (7 << 24),
	259 | (8 << 24),
	515 | (9 << 24),
	1027 | (10 << 24),
	2051 | (11 << 24),
	4099 | (12 << 24),
	8195 | (13 << 24),
	16387 | (14 << 24),
	32771 | (15 << 24),
	65539 | (16 << 24),
}

// makeMatchBaselineFSE converts the match length fseTable to baselineTable.
func (r *Reader) makeMatchBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error {
	for i, e := range fseTable {
		be := fseBaselineEntry{
			bits: e.bits,
			base: e.base,
		}
		if e.sym < matchLengthOffset {
			be.baseline = uint32(e.sym) + 3
			be.basebits = 0
		} else {
			if e.sym > 52 {
				return r.makeError(off, "FSE baseline symbol overflow")
			}
			idx := e.sym - matchLengthOffset
			basebits := matchLengthBase[idx]
			be.baseline = basebits & 0xffffff
			be.basebits = uint8(basebits >> 24)
		}
		baselineTable[i] = be
	}
	return nil
}

// predefinedLiteralTable is the predefined table to use for literal lengths.
// Generated from table in RFC 3.1.1.3.2.2.1.
// Checked by TestPredefinedTables.
var predefinedLiteralTable = [...]fseBaselineEntry{
	{0, 0, 4, 0}, {0, 0, 4, 16}, {1, 0, 5, 32},
	{3, 0, 5, 0}, {4, 0, 5, 0}, {6, 0, 5, 0},
	{7, 0, 5, 0}, {9, 0, 5, 0}, {10, 0, 5, 0},
	{12, 0, 5, 0}, {14, 0, 6, 0}, {16, 1, 5, 0},
	{20, 1, 5, 0}, {22, 1, 5, 0}, {28, 2, 5, 0},
	{32, 3, 5, 0}, {48, 4, 5, 0}, {64, 6, 5, 32},
	{128, 7, 5, 0}, {256, 8, 6, 0}, {1024, 10, 6, 0},
	{4096, 12, 6, 0}, {0, 0, 4, 32}, {1, 0, 4, 0},
	{2, 0, 5, 0}, {4, 0, 5, 32}, {5, 0, 5, 0},
	{7, 0, 5, 32}, {8, 0, 5, 0}, {10, 0, 5, 32},
	{11, 0, 5, 0}, {13, 0, 6, 0}, {16, 1, 5, 32},
	{18, 1, 5, 0}, {22, 1, 5, 32}, {24, 2, 5, 0},
	{32, 3, 5, 32}, {40, 3, 5, 0}, {64, 6, 4, 0},
	{64, 6, 4, 16}, {128, 7, 5, 32}, {512, 9, 6, 0},
	{2048, 11, 6, 0}, {0, 0, 4, 48}, {1, 0, 4, 16},
	{2, 0, 5, 32}, {3, 0, 5, 32}, {5, 0, 5, 32},
	{6, 0, 5, 32}, {8, 0, 5, 32}, {9, 0, 5, 32},
	{11, 0, 5, 32}, {12, 0, 5, 32}, {15, 0, 6, 0},
	{18, 1, 5, 32}, {20, 1, 5, 32}, {24, 2, 5, 32},
	{28, 2, 5, 32}, {40, 3, 5, 32}, {48, 4, 5, 32},
	{65536, 16, 6, 0}, {32768, 15, 6, 0}, {16384, 14, 6, 0},
	{8192, 13, 6, 0},
}

// predefinedOffsetTable is the predefined table to use for offsets.
// Generated from table in RFC 3.1.1.3.2.2.3.
// Checked by TestPredefinedTables.
var predefinedOffsetTable = [...]fseBaselineEntry{
	{1, 0, 5, 0}, {61, 6, 4, 0}, {509, 9, 5, 0},
	{32765, 15, 5, 0}, {2097149, 21, 5, 0}, {5, 3, 5, 0},
	{125, 7, 4, 0}, {4093, 12, 5, 0}, {262141, 18, 5, 0},
	{8388605, 23, 5, 0}, {29, 5, 5, 0}, {253, 8, 4, 0},
	{16381, 14, 5, 0}, {1048573, 20, 5, 0}, {1, 2, 5, 0},
	{125, 7, 4, 16}, {2045, 11, 5, 0}, {131069, 17, 5, 0},
	{4194301, 22, 5, 0}, {13, 4, 5, 0}, {253, 8, 4, 16},
	{8189, 13, 5, 0}, {524285, 19, 5, 0}, {2, 1, 5, 0},
	{61, 6, 4, 16}, {1021, 10, 5, 0}, {65533, 16, 5, 0},
	{268435453, 28, 5, 0}, {134217725, 27, 5, 0}, {67108861, 26, 5, 0},
	{33554429, 25, 5, 0}, {16777213, 24, 5, 0},
}

// predefinedMatchTable is the predefined table to use for match lengths.
// Generated from table in RFC 3.1.1.3.2.2.2.
// Checked by TestPredefinedTables.
var predefinedMatchTable = [...]fseBaselineEntry{
	{3, 0, 6, 0}, {4, 0, 4, 0}, {5, 0, 5, 32},
	{6, 0, 5, 0}, {8, 0, 5, 0}, {9, 0, 5, 0},
	{11, 0, 5, 0}, {13, 0, 6, 0}, {16, 0, 6, 0},
	{19, 0, 6, 0}, {22, 0, 6, 0}, {25, 0, 6, 0},
	{28, 0, 6, 0}, {31, 0, 6, 0}, {34, 0, 6, 0},
	{37, 1, 6, 0}, {41, 1, 6, 0}, {47, 2, 6, 0},
	{59, 3, 6, 0}, {83, 4, 6, 0}, {131, 7, 6, 0},
	{515, 9, 6, 0}, {4, 0, 4, 16}, {5, 0, 4, 0},
	{6, 0, 5, 32}, {7, 0, 5, 0}, {9, 0, 5, 32},
	{10, 0, 5, 0}, {12, 0, 6, 0}, {15, 0, 6, 0},
	{18, 0, 6, 0}, {21, 0, 6, 0}, {24, 0, 6, 0},
	{27, 0, 6, 0}, {30, 0, 6, 0}, {33, 0, 6, 0},
	{35, 1, 6, 0}, {39, 1, 6, 0}, {43, 2, 6, 0},
	{51, 3, 6, 0}, {67, 4, 6, 0}, {99, 5, 6, 0},
	{259, 8, 6, 0}, {4, 0, 4, 32}, {4, 0, 4, 48},
	{5, 0, 4, 16}, {7, 0, 5, 32}, {8, 0, 5, 32},
	{10, 0, 5, 32}, {11, 0, 5, 32}, {14, 0, 6, 0},
	{17, 0, 6, 0}, {20, 0, 6, 0}, {23, 0, 6, 0},
	{26, 0, 6, 0}, {29, 0, 6, 0}, {32, 0, 6, 0},
	{65539, 16, 6, 0}, {32771, 15, 6, 0}, {16387, 14, 6, 0},
	{8195, 13, 6, 0}, {4099, 12, 6, 0}, {2051, 11, 6, 0},
	{1027, 10, 6, 0},
}