package transformers

import (
	"fmt"
	"os"
	"strings"

	"github.com/johnkerl/miller/v6/pkg/cli"
	"github.com/johnkerl/miller/v6/pkg/lib"
	"github.com/johnkerl/miller/v6/pkg/mlrval"
	"github.com/johnkerl/miller/v6/pkg/types"
)

const verbNameCountDistinct = "count-distinct"
const verbNameUniq = "uniq"
const uniqDefaultOutputFieldName = "count"

var CountDistinctSetup = TransformerSetup{
	Verb:         verbNameCountDistinct,
	UsageFunc:    transformerCountDistinctUsage,
	ParseCLIFunc: transformerCountDistinctParseCLI,
	IgnoresInput: false,
}

var UniqSetup = TransformerSetup{
	Verb:         verbNameUniq,
	UsageFunc:    transformerUniqUsage,
	ParseCLIFunc: transformerUniqParseCLI,
	IgnoresInput: false,
}

func transformerCountDistinctUsage(
	o *os.File,
) {
	argv0 := "mlr"
	verb := verbNameCountDistinct
	fmt.Fprintf(o, "Usage: %s %s [options]\n", argv0, verb)
	fmt.Fprintf(o, "Prints number of records having distinct values for specified field names.\n")
	fmt.Fprintf(o, "Same as uniq -c.\n")
	fmt.Fprintf(o, "\n")
	fmt.Fprintf(o, "Options:\n")
	fmt.Fprintf(o, "-f {a,b,c}    Field names for distinct count.\n")
	fmt.Fprintf(o, "-x {a,b,c}    Field names to exclude for distinct count: use each record's others instead.\n")
	fmt.Fprintf(o, "-n            Show only the number of distinct values. Not compatible with -u.\n")
	fmt.Fprintf(o, "-o {name}     Field name for output count. Default \"%s\".\n", uniqDefaultOutputFieldName)
	fmt.Fprintf(o, "              Ignored with -u.\n")
	fmt.Fprintf(o, "-u            Do unlashed counts for multiple field names. With -f a,b and\n")
	fmt.Fprintf(o, "              without -u, computes counts for distinct combinations of a\n")
	fmt.Fprintf(o, "              and b field values. With -f a,b and with -u, computes counts\n")
	fmt.Fprintf(o, "              for distinct a field values and counts for distinct b field\n")
	fmt.Fprintf(o, "              values separately.\n")
}

func transformerCountDistinctParseCLI(
	pargi *int,
	argc int,
	args []string,
	_ *cli.TOptions,
	doConstruct bool, // false for first pass of CLI-parse, true for second pass
) (RecordTransformer, error) {

	// Skip the verb name from the current spot in the mlr command line
	argi := *pargi
	verb := args[argi]
	argi++

	// Parse local flags
	var fieldNames []string = nil
	invertFieldNames := false
	showNumDistinctOnly := false
	outputFieldName := uniqDefaultOutputFieldName
	doLashed := true

	var err error
	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
		opt := args[argi]
		if !strings.HasPrefix(opt, "-") {
			break // No more flag options to process
		}
		if args[argi] == "--" {
			break // All transformers must do this so main-flags can follow verb-flags
		}
		argi++

		if opt == "-h" || opt == "--help" {
			transformerCountDistinctUsage(os.Stdout)
			return nil, cli.ErrHelpRequested

		} else if opt == "-g" || opt == "-f" {
			fieldNames, err = cli.VerbGetStringArrayArg(verb, opt, args, &argi, argc)
			if err != nil {
				return nil, err
			}

		} else if opt == "-x" {
			fieldNames, err = cli.VerbGetStringArrayArg(verb, opt, args, &argi, argc)
			if err != nil {
				return nil, err
			}
			invertFieldNames = true

		} else if opt == "-n" {
			showNumDistinctOnly = true

		} else if opt == "-o" {
			outputFieldName, err = cli.VerbGetStringArg(verb, opt, args, &argi, argc)
			if err != nil {
				return nil, err
			}

		} else if opt == "-u" {
			doLashed = false

		} else {
			return nil, cli.VerbErrorf(verb, "option \"%s\" not recognized", opt)
		}
	}

	if fieldNames == nil {
		return nil, cli.VerbErrorf(verb, "-g or -x field names required")
	}
	if !doLashed && showNumDistinctOnly {
		return nil, cli.VerbErrorf(verb, "-n requires -a (uniqify entire records)")
	}

	showCounts := true
	uniqifyEntireRecords := false

	*pargi = argi
	if !doConstruct { // All transformers must do this for main command-line parsing
		return nil, nil
	}

	transformer, err := NewTransformerUniq(
		fieldNames,
		invertFieldNames,
		showCounts,
		showNumDistinctOnly,
		outputFieldName,
		doLashed,
		uniqifyEntireRecords,
	)
	if err != nil {
		return nil, err
	}

	return transformer, nil
}

func transformerUniqUsage(
	o *os.File,
) {
	argv0 := "mlr"
	verb := verbNameUniq
	fmt.Fprintf(o, "Usage: %s %s [options]\n", argv0, verb)
	fmt.Fprintf(o, "Prints distinct values for specified field names. With -c, same as\n")
	fmt.Fprintf(o, "count-distinct. For uniq, -f is a synonym for -g.\n")
	fmt.Fprintf(o, "\n")
	fmt.Fprintf(o, "Options:\n")
	fmt.Fprintf(o, "-g {d,e,f}    Group-by-field names for uniq counts.\n")
	fmt.Fprintf(o, "-x {a,b,c}    Field names to exclude for uniq: use each record's others instead.\n")
	fmt.Fprintf(o, "-c            Show repeat counts in addition to unique values.\n")
	fmt.Fprintf(o, "-n            Show only the number of distinct values.\n")
	fmt.Fprintf(o, "-o {name}     Field name for output count. Default \"%s\".\n", uniqDefaultOutputFieldName)
	fmt.Fprintf(o, "-a            Output each unique record only once. Incompatible with -g.\n")
	fmt.Fprintf(o, "              With -c, produces unique records, with repeat counts for each.\n")
	fmt.Fprintf(o, "              With -n, produces only one record which is the unique-record count.\n")
	fmt.Fprintf(o, "              With neither -c nor -n, produces unique records.\n")
}

func transformerUniqParseCLI(
	pargi *int,
	argc int,
	args []string,
	_ *cli.TOptions,
	doConstruct bool, // false for first pass of CLI-parse, true for second pass
) (RecordTransformer, error) {

	// Skip the verb name from the current spot in the mlr command line
	argi := *pargi
	verb := args[argi]
	argi++

	// Parse local flags
	var fieldNames []string = nil
	invertFieldNames := false
	showCounts := false
	showNumDistinctOnly := false
	outputFieldName := uniqDefaultOutputFieldName
	uniqifyEntireRecords := false

	var err error
	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
		opt := args[argi]
		if !strings.HasPrefix(opt, "-") {
			break // No more flag options to process
		}
		if args[argi] == "--" {
			break // All transformers must do this so main-flags can follow verb-flags
		}
		argi++

		if opt == "-h" || opt == "--help" {
			transformerUniqUsage(os.Stdout)
			return nil, cli.ErrHelpRequested

		} else if opt == "-g" || opt == "-f" {
			fieldNames, err = cli.VerbGetStringArrayArg(verb, opt, args, &argi, argc)
			if err != nil {
				return nil, err
			}

		} else if opt == "-x" {
			fieldNames, err = cli.VerbGetStringArrayArg(verb, opt, args, &argi, argc)
			if err != nil {
				return nil, err
			}
			invertFieldNames = true

		} else if opt == "-c" {
			showCounts = true

		} else if opt == "-n" {
			showNumDistinctOnly = true

		} else if opt == "-o" {
			outputFieldName, err = cli.VerbGetStringArg(verb, opt, args, &argi, argc)
			if err != nil {
				return nil, err
			}

		} else if opt == "-a" {
			uniqifyEntireRecords = true

		} else {
			return nil, cli.VerbErrorf(verb, "option \"%s\" not recognized", opt)
		}
	}

	if uniqifyEntireRecords {
		if fieldNames != nil {
			return nil, cli.VerbErrorf(verb, "-a (uniqify entire records) is incompatible with -g/-x")
		}
		if showCounts && showNumDistinctOnly {
			return nil, cli.VerbErrorf(verb, "-c and -n are mutually exclusive with -a")
		}
	} else {
		if fieldNames == nil {
			return nil, cli.VerbErrorf(verb, "-g or -x field names required")
		}
	}

	doLashed := true

	*pargi = argi
	if !doConstruct { // All transformers must do this for main command-line parsing
		return nil, nil
	}

	transformer, _ := NewTransformerUniq(
		fieldNames,
		invertFieldNames,
		showCounts,
		showNumDistinctOnly,
		outputFieldName,
		doLashed,
		uniqifyEntireRecords,
	)

	return transformer, nil
}

type TransformerUniq struct {
	fieldNames       []string
	fieldNamesSet    map[string]bool
	invertFieldNames bool
	showCounts       bool
	outputFieldName  string

	// Example:
	// Input is:
	//   a=1,b=2,c=3
	//   a=4,b=5,c=6
	// Uniquing on fields ["a","b"]
	// uniqifiedRecordCounts:
	//   '{"a":1,"b":2,"c":3}' => 1
	//   '{"a":4,"b":5,"c":6}' => 1
	// uniqifiedRecords:
	//   '{"a":1,"b":2,"c":3}' => {"a":1,"b":2,"c":3}
	//   '{"a":4,"b":5,"c":6}' => {"a":4,"b":5,"c":6}
	// countsByGroup:
	//  "1,2" -> 1
	//  "4,5" -> 1
	// valuesByGroup:
	//  "1,2" -> [1,2]
	//  "4,5" -> [4,5]
	// unlashedCounts:
	//   "a" => "1" => 1
	//   "a" => "4" => 1
	//   ...
	// unlashedCountValues:
	//   "a" => "1" => 1
	//   "a" => "4" => 4
	uniqifiedRecordCounts *lib.OrderedMap[int64]                           // record-as-string -> counts
	uniqifiedRecords      *lib.OrderedMap[*types.RecordAndContext]         // record-as-string -> records
	keysByGroup           *lib.OrderedMap[[]string]                        // XXX COMMENT ME
	countsByGroup         *lib.OrderedMap[int64]                           // grouping key -> count
	valuesByGroup         *lib.OrderedMap[[]*mlrval.Mlrval]                // grouping key -> array of values
	unlashedCounts        *lib.OrderedMap[*lib.OrderedMap[int64]]          // field name -> string field value -> count
	unlashedCountValues   *lib.OrderedMap[*lib.OrderedMap[*mlrval.Mlrval]] // field name -> string field value -> typed field value

	recordTransformerFunc RecordTransformerFunc
}

func NewTransformerUniq(
	fieldNames []string,
	invertFieldNames bool,
	showCounts bool,
	showNumDistinctOnly bool,
	outputFieldName string,
	doLashed bool,
	uniqifyEntireRecords bool,
) (*TransformerUniq, error) {

	tr := &TransformerUniq{
		fieldNames:       fieldNames,
		fieldNamesSet:    lib.StringListToSet(fieldNames),
		invertFieldNames: invertFieldNames,
		showCounts:       showCounts,
		outputFieldName:  outputFieldName,

		uniqifiedRecordCounts: lib.NewOrderedMap[int64](),
		uniqifiedRecords:      lib.NewOrderedMap[*types.RecordAndContext](),
		keysByGroup:           lib.NewOrderedMap[[]string](),
		countsByGroup:         lib.NewOrderedMap[int64](),
		valuesByGroup:         lib.NewOrderedMap[[]*mlrval.Mlrval](),
		unlashedCounts:        lib.NewOrderedMap[*lib.OrderedMap[int64]](),
		unlashedCountValues:   lib.NewOrderedMap[*lib.OrderedMap[*mlrval.Mlrval]](),
	}

	if uniqifyEntireRecords {
		if showCounts {
			tr.recordTransformerFunc = tr.transformUniqifyEntireRecordsShowCounts
		} else if showNumDistinctOnly {
			tr.recordTransformerFunc = tr.transformUniqifyEntireRecordsShowNumDistinctOnly
		} else {
			tr.recordTransformerFunc = tr.transformUniqifyEntireRecords
		}
	} else if !doLashed {
		tr.recordTransformerFunc = tr.transformUnlashed
	} else if showNumDistinctOnly {
		tr.recordTransformerFunc = tr.transformNumDistinctOnly
	} else if showCounts {
		tr.recordTransformerFunc = tr.transformWithCounts
	} else {
		tr.recordTransformerFunc = tr.transformWithoutCounts
	}

	return tr, nil
}

func (tr *TransformerUniq) getFieldNamesForGrouping(
	inrec *mlrval.Mlrmap,
) []string {
	if tr.invertFieldNames {
		return inrec.GetKeysExcept(tr.fieldNamesSet)
	}
	return tr.fieldNames
}

func (tr *TransformerUniq) Transform(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
	tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel)
}

// Print each unique record only once, with uniqueness counts.  This means
// non-streaming, with output at end of stream.
func (tr *TransformerUniq) transformUniqifyEntireRecordsShowCounts(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record

		recordAsString := inrec.String()
		icount, present := tr.uniqifiedRecordCounts.GetWithCheck(recordAsString)
		if !present { // first time seen
			tr.uniqifiedRecordCounts.Put(recordAsString, int64(1))
			tr.uniqifiedRecords.Put(recordAsString, inrecAndContext.Copy())
		} else { // have seen before
			tr.uniqifiedRecordCounts.Put(recordAsString, icount+1)
		}

	} else { // end of record stream

		for pe := tr.uniqifiedRecords.Head; pe != nil; pe = pe.Next {
			outrecAndContext := pe.Value
			icount := tr.uniqifiedRecordCounts.Get(pe.Key)
			mcount := mlrval.FromInt(icount)
			outrecAndContext.Record.PrependReference(tr.outputFieldName, mcount)
			*outputRecordsAndContexts = append(*outputRecordsAndContexts, outrecAndContext)
		}

		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}

}

// Print count of unique records.  This means non-streaming, with output at end
// of stream.
func (tr *TransformerUniq) transformUniqifyEntireRecordsShowNumDistinctOnly(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record
		recordAsString := inrec.String()
		if !tr.uniqifiedRecordCounts.Has(recordAsString) {
			tr.uniqifiedRecordCounts.Put(recordAsString, int64(1))
		}

	} else { // end of record stream
		outrec := mlrval.NewMlrmapAsRecord()
		outrec.PutReference(
			tr.outputFieldName,
			mlrval.FromInt(tr.uniqifiedRecordCounts.FieldCount),
		)
		*outputRecordsAndContexts = append(*outputRecordsAndContexts, types.NewRecordAndContext(outrec, &inrecAndContext.Context))

		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}
}

// Print each unique record only once (on first occurrence).
func (tr *TransformerUniq) transformUniqifyEntireRecords(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record

		recordAsString := inrec.String()
		if !tr.uniqifiedRecordCounts.Has(recordAsString) {
			tr.uniqifiedRecordCounts.Put(recordAsString, int64(1))
			*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext)
		}

	} else { // end of record stream

		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}
}

func (tr *TransformerUniq) transformUnlashed(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record

		for _, fieldName := range tr.getFieldNamesForGrouping(inrec) {
			var countsForFieldName *lib.OrderedMap[int64] = nil
			iCountsForFieldName, present := tr.unlashedCounts.GetWithCheck(fieldName)
			if !present {
				countsForFieldName = lib.NewOrderedMap[int64]()
				tr.unlashedCounts.Put(fieldName, countsForFieldName)
				tr.unlashedCountValues.Put(fieldName, lib.NewOrderedMap[*mlrval.Mlrval]())
			} else {
				countsForFieldName = iCountsForFieldName
			}

			fieldValue := inrec.Get(fieldName)
			if fieldValue != nil {
				fieldValueString := fieldValue.String()
				if !countsForFieldName.Has(fieldValueString) {
					countsForFieldName.Put(fieldValueString, int64(1))
					tr.unlashedCountValues.Get(fieldName).Put(fieldValueString, fieldValue.Copy())
				} else {
					countsForFieldName.Put(fieldValueString, countsForFieldName.Get(fieldValueString)+1)
				}
			}
		}

	} else { // end of record stream

		for pe := tr.unlashedCounts.Head; pe != nil; pe = pe.Next {
			fieldName := pe.Key
			countsForFieldName := pe.Value
			for pf := countsForFieldName.Head; pf != nil; pf = pf.Next {
				fieldValueString := pf.Key
				outrec := mlrval.NewMlrmapAsRecord()
				outrec.PutReference("field", mlrval.FromString(fieldName))
				outrec.PutCopy(
					"value",
					tr.unlashedCountValues.Get(fieldName).Get(fieldValueString),
				)
				outrec.PutReference("count", mlrval.FromInt(pf.Value))
				*outputRecordsAndContexts = append(*outputRecordsAndContexts, types.NewRecordAndContext(outrec, &inrecAndContext.Context))
			}
		}

		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}
}

func (tr *TransformerUniq) transformNumDistinctOnly(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record

		groupingKey, ok := inrec.GetSelectedValuesJoined(tr.getFieldNamesForGrouping(inrec))
		if ok {
			iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
			if !present {
				tr.countsByGroup.Put(groupingKey, int64(1))
			} else {
				tr.countsByGroup.Put(groupingKey, iCount+1)
			}
		}

	} else {
		outrec := mlrval.NewMlrmapAsRecord()
		outrec.PutReference(
			"count",
			mlrval.FromInt(tr.countsByGroup.FieldCount),
		)
		*outputRecordsAndContexts = append(*outputRecordsAndContexts, types.NewRecordAndContext(outrec, &inrecAndContext.Context))

		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}
}

func (tr *TransformerUniq) transformWithCounts(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record

		fieldNamesForGrouping := tr.getFieldNamesForGrouping(inrec)

		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(fieldNamesForGrouping)
		if ok {
			iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
			if !present {
				tr.countsByGroup.Put(groupingKey, int64(1))
				tr.valuesByGroup.Put(groupingKey, selectedValues)
				tr.keysByGroup.Put(groupingKey, fieldNamesForGrouping)
			} else {
				tr.countsByGroup.Put(groupingKey, iCount+1)
			}
		}

	} else { // end of record stream
		for pa := tr.countsByGroup.Head; pa != nil; pa = pa.Next {
			outrec := mlrval.NewMlrmapAsRecord()
			valuesForGroup := tr.valuesByGroup.Get(pa.Key)
			keysForGroup := tr.keysByGroup.Get(pa.Key)

			for i, fieldNameForGrouping := range keysForGroup {
				outrec.PutCopy(
					fieldNameForGrouping,
					valuesForGroup[i],
				)
			}

			if tr.showCounts {
				outrec.PutReference(
					tr.outputFieldName,
					mlrval.FromInt(pa.Value),
				)
			}
			*outputRecordsAndContexts = append(*outputRecordsAndContexts, types.NewRecordAndContext(outrec, &inrecAndContext.Context))
		}

		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}
}

func (tr *TransformerUniq) transformWithoutCounts(
	inrecAndContext *types.RecordAndContext,
	outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
	inputDownstreamDoneChannel <-chan bool,
	outputDownstreamDoneChannel chan<- bool,
) {
	if !inrecAndContext.EndOfStream {
		inrec := inrecAndContext.Record

		groupingKey, selectedValues, ok := inrec.GetSelectedValuesAndJoined(tr.getFieldNamesForGrouping(inrec))
		if !ok {
			return
		}

		iCount, present := tr.countsByGroup.GetWithCheck(groupingKey)
		if !present {
			tr.countsByGroup.Put(groupingKey, int64(1))
			tr.valuesByGroup.Put(groupingKey, selectedValues)
			outrec := mlrval.NewMlrmapAsRecord()

			for i, fieldNameForGrouping := range tr.getFieldNamesForGrouping(inrec) {
				outrec.PutCopy(
					fieldNameForGrouping,
					selectedValues[i],
				)
			}

			*outputRecordsAndContexts = append(*outputRecordsAndContexts, types.NewRecordAndContext(outrec, &inrecAndContext.Context))

		} else {
			tr.countsByGroup.Put(groupingKey, iCount+1)
		}

	} else { // end of record stream
		*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext) // end-of-stream marker
	}
}
