﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace Kinect_CameraControl
{
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Threading;
    using Microsoft.Kinect;
    using Microsoft.Speech.AudioFormat;
    using Microsoft.Speech.Recognition;

    public class SpeechRecognizer : IDisposable
    {

        #region Private State
        
        /// <summary>
        /// Control phrases dictionary
        /// </summary>
        private readonly Dictionary<string, WhatSaid> controlPhrases = new Dictionary<string, WhatSaid>
            {
                { "Up", new WhatSaid { Verb = Verbs.Up } },
                { "Down", new WhatSaid { Verb = Verbs.Down } },
                { "Middle", new WhatSaid { Verb = Verbs.Middle } },
                { "Good bye", new WhatSaid { Verb = Verbs.GoodBye } },
                { "Show skeleton", new WhatSaid { Verb = Verbs.ShowSkeleton } },
                { "Hide skeleton", new WhatSaid { Verb = Verbs.HideSkeleton } }
            };

        /// <summary>
        /// Speech recognizer engine
        /// </summary>
        private SpeechRecognitionEngine sre;

        /// <summary>
        /// Audio source
        /// </summary>
        private KinectAudioSource kinectAudioSource;

        /// <summary>
        /// Flag. Controls the state of the recogniezr.
        /// </summary>
        private bool isDisposed;

        /// <summary>
        /// Verbs asosiated with the control phrases.
        /// </summary>
        public enum Verbs
        {
            None = 0,
            Up,
            Down,
            Middle,
            GoodBye,
            ShowSkeleton,
            HideSkeleton
        }

        /// <summary>
        /// WhatSaid struct
        /// </summary>
        private struct WhatSaid
        {
            public Verbs Verb;
        }


        #endregion Private State

        #region Ctor and events

        /// <summary>
        /// Class Constructor.
        /// </summary>
        private SpeechRecognizer()
        {
            RecognizerInfo ri = GetKinectRecognizer();
            this.sre = new SpeechRecognitionEngine(ri);
            this.LoadGrammar(this.sre);
        }

        /// <summary>
        /// This event is called when a control phrase is recognized.
        /// </summary>
        public event EventHandler<SaidSomethingEventArgs> SaidSomething;

        /// <summary>
        /// Event args for SaidSomethingEvent.
        /// </summary>
        public class SaidSomethingEventArgs : EventArgs
        {
            public Verbs Verb { get; set; }

            public string Phrase { get; set; }

            public string Matched { get; set; }
        }

        #endregion Ctor and events

        #region Init/Dispose Speech Recognizer

        /// <summary>
        /// 
        /// </summary>
        public EchoCancellationMode EchoCancellationMode
        {
            get
            {
                this.CheckDisposed();
                return this.kinectAudioSource.EchoCancellationMode;
            }

            set
            {
                this.CheckDisposed();
                this.kinectAudioSource.EchoCancellationMode = value;
            }
        }

        /// <summary>
        /// Creates the recognizer.
        /// This method exists so that it can be easily called and return safely if the speech prereqs aren't installed.
        /// We isolate the try/catch inside this class, and don't impose the need on the caller.
        /// </summary>
        /// <returns></returns>
        public static SpeechRecognizer Create()
        {
            SpeechRecognizer recognizer = null;

            try
            {
                recognizer = new SpeechRecognizer();
            }
            catch (Exception)
            {
                // speech prereq isn't installed. a null recognizer will be handled properly by the app.
            }

            return recognizer;
        }

        /// <summary>
        /// Start the recognizer.
        /// </summary>
        /// <param name="kinectSource"></param>
        public void Start(KinectAudioSource kinectSource)
        {
            this.CheckDisposed();

            this.kinectAudioSource = kinectSource;
            this.kinectAudioSource.AutomaticGainControlEnabled = false;
            this.kinectAudioSource.BeamAngleMode = BeamAngleMode.Adaptive;
            var kinectStream = this.kinectAudioSource.Start();
            this.sre.SetInputToAudioStream(
                kinectStream, new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
            this.sre.RecognizeAsync(RecognizeMode.Multiple);
        }

        /// <summary>
        /// Stop the recognizer.
        /// </summary>
        public void Stop()
        {
            this.CheckDisposed();

            if (this.sre != null)
            {
                this.kinectAudioSource.Stop();
                this.sre.RecognizeAsyncCancel();
                this.sre.RecognizeAsyncStop();

                this.sre.SpeechRecognized -= this.SreSpeechRecognized;
                this.sre.SpeechHypothesized -= this.SreSpeechHypothesized;
                this.sre.SpeechRecognitionRejected -= this.SreSpeechRecognitionRejected;
            }
        }

        /// <summary>
        /// Disposes the class. Since this process is slow, the class is disposed on the background thread.
        /// </summary>
        public void Dispose()
        {
            this.Dispose(true);
            GC.SuppressFinalize(this);
        }

        /// <summary>
        /// Disposes the class. Since this process is slow, the class is disposed on the background thread.
        /// </summary>
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Usage", "CA2213:DisposableFieldsShouldBeDisposed", MessageId = "sre",
            Justification = "This is suppressed because FXCop does not see our threaded dispose.")]
        protected virtual void Dispose(bool disposing)
        {
            if (disposing)
            {
                this.Stop();

                if (this.sre != null)
                {
                    // NOTE: The SpeechRecognitionEngine can take a long time to dispose
                    // so we will dispose it on a background thread
                    ThreadPool.QueueUserWorkItem(
                        delegate(object state)
                        {
                            IDisposable toDispose = state as IDisposable;
                            if (toDispose != null)
                            {
                                toDispose.Dispose();
                            }
                        },
                            this.sre);
                    this.sre = null;
                }

                this.isDisposed = true;
            }
        }

        #endregion Init/Dispose Speech Recognizer

        #region Methods

        /// <summary>
        /// Kinect recognizer getter.
        /// </summary>
        /// <returns></returns>
        private static RecognizerInfo GetKinectRecognizer()
        {
            Func<RecognizerInfo, bool> matchingFunc = r =>
            {
                string value;
                r.AdditionalInfo.TryGetValue("Kinect", out value);
                return "True".Equals(value, StringComparison.InvariantCultureIgnoreCase) && "en-US".Equals(r.Culture.Name, StringComparison.InvariantCultureIgnoreCase);
            };
            return SpeechRecognitionEngine.InstalledRecognizers().Where(matchingFunc).FirstOrDefault();
        }

        /// <summary>
        /// Check  if the recognizer is disposed.
        /// </summary>
        private void CheckDisposed()
        {
            if (this.isDisposed)
            {
                throw new ObjectDisposedException("SpeechRecognizer");
            }
        }

        /// <summary>
        /// Loads the control phrases grammar. More complex grammar can be programmed on this method.
        /// </summary>
        /// <param name="speechRecognitionEngine"></param>
        private void LoadGrammar(SpeechRecognitionEngine speechRecognitionEngine)
        {
            // Build a simple grammar of shapes, colors, and some simple program control
            var single = new Choices();
            foreach (var phrase in this.controlPhrases)
            {
                single.Add(phrase.Key);
            }

            var allChoices = new Choices();
            allChoices.Add(single);

            // This is needed to ensure that it will work on machines with any culture, not just en-us.
            var gb = new GrammarBuilder { Culture = speechRecognitionEngine.RecognizerInfo.Culture };
            gb.Append(allChoices);

            var g = new Grammar(gb);
            speechRecognitionEngine.LoadGrammar(g);
            speechRecognitionEngine.SpeechRecognized += this.SreSpeechRecognized;
            speechRecognitionEngine.SpeechHypothesized += this.SreSpeechHypothesized;
            speechRecognitionEngine.SpeechRecognitionRejected += this.SreSpeechRecognitionRejected;
        }

        /// <summary>
        /// Callback. Called when the audio can't be recognized.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void SreSpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            var said = new SaidSomethingEventArgs { Verb = Verbs.None, Matched = "?" };

            if (this.SaidSomething != null)
            {
                this.SaidSomething(new object(), said);
            }
        }

        /// <summary>
        /// Callback. Called when the recognizer is not certain that the user said something but it is probable. 
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void SreSpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            // Don't do anything
        }

        /// <summary>
        /// Callback. Called when something has ben recognized.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void SreSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            if ((this.SaidSomething == null) || (e.Result.Confidence < 0.3))
            {
                return;
            }

            var said = new SaidSomethingEventArgs { Verb = 0, Phrase = e.Result.Text };


            // Look for a match in the order of the lists below, first match wins.
            List<Dictionary<string, WhatSaid>> allDicts = new List<Dictionary<string, WhatSaid>> { this.controlPhrases };

            bool found = false;
            for (int i = 0; i < allDicts.Count && !found; ++i)
            {
                foreach (var phrase in allDicts[i])
                {
                    if (e.Result.Text.Contains(phrase.Key))
                    {
                        said.Verb = phrase.Value.Verb;
                        found = true;
                        break;
                    }
                }
            }

            if (!found)
            {
                return;
            }

            if (this.SaidSomething != null)
            {
                this.SaidSomething(new object(), said);
            }
        }

        #endregion Methods
    }
}
