Thursday, 17 November 2011

Excel (2010) Automation with C#

Introduction
When implementing and supporting systems you don't always stick with the cleanest and most elegant solution and one of my common tricks is downright ugly. At least twice a week I find it's necessary to update a set of data from a spreadsheet where it doesn't always make sense to bulk import it into a temporary table. Generally I find the fastest solution is just to use the string concatenation in Excel  and write something like this.

="UPDATE Employee Set Email = '" & A1 & "' Where Username = '" & A2 &"'"

The problem with this is

  • It can be quite fiddly trying to work out where you made a small mistake and missed a ' or ".
  • It's not very usable.
Idea
I wanted to write something which would be similar to how parameters can be bound to a dynamic piece of SQL e.g.

UPDATE Employee Set Email = {1} Where Username = {2}

Then from the type of value in the cell infer whether or not it needs to be wrapped in quotes.

The immediate solution I thought of was the Visual Studio Tools for Office (VSTO) and I'd write an Addin in C#.

Considering I've never written an Addin this was a pretty stupid thought for a first idea but I decided to carry on.

Stumbling Block One
I should have realised this sooner, all communication with Excel must be done through the COM Primary Interop Assembly interface. Oh good, yet another area I have no experience with. This actually turned out now to be a major issue until a little while later.

First Impressions
This is pretty easy, register an event handler so that I can see when a cell changes and work with it.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using Excel = Microsoft.Office.Interop.Excel;
using Office = Microsoft.Office.Core;
using Microsoft.Office.Tools.Excel;


namespace ExcelSQLFormatter
{
    public partial class ThisAddIn
    {
        private void ThisAddIn_Startup(object sender, System.EventArgs e)
        {
            this.Application.SheetChange += new Excel.AppEvents_SheetChangeEventHandler(Application_SheetChange);
        }

        void Application_SheetChange(object Sh, Excel.Range Target)
        {
            Debug.WriteLine("AppEvents_SheetChangeEventHandler");
        }
           private void ThisAddIn_Shutdown(object sender, System.EventArgs e)
        {
        }

        #region VSTO generated code

        /// <summary>
        /// Required method for Designer support - do not modify
        /// the contents of this method with the code editor.
        /// </summary>
        private void InternalStartup()
        {
            this.Startup += new System.EventHandler(ThisAddIn_Startup);
            this.Shutdown += new System.EventHandler(ThisAddIn_Shutdown);
        }

        #endregion
    }
}

Stumbling Block Two
The main problem I currently have and haven't solved is that when I read the value from a cell it's either a System.String or a System.Double which is fine until I have a date value.

Using .NumberFormat is not reliable as it returns m/d/yyyy for the date '31/12/2011'

Interim Code Solution
The following is where I'm leaving this experiment as I don't want to spend more time on something that will save me less than 10 minutes a week. It's been an interesting look at how you can automate Excel but what it's made really clear is that the documentation available is poor especially around PIA.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Globalization;
using System.Threading;
using System.Text;
using System.Xml.Linq;
using Excel = Microsoft.Office.Interop.Excel;
using Office = Microsoft.Office.Core;
using Microsoft.Office.Tools.Excel;
using System.Text.RegularExpressions;

namespace ExcelSQLFormatter
{
    public partial class ThisAddIn
    {
        string[] letters = new string[27] { "", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U","V","W","X","Y","Z" };
        private void ThisAddIn_Startup(object sender, System.EventArgs e)
        {
            this.Application.SheetChange += new Excel.AppEvents_SheetChangeEventHandler(Application_SheetChange);
        }

        void Application_SheetChange(object Sh, Excel.Range Target)
        {
            //Stop handling events while I change
            this.Application.SheetChange -= new Excel.AppEvents_SheetChangeEventHandler(Application_SheetChange);
            HandleSQL(Sh, Target);
            //Restore handling events
            this.Application.SheetChange += new Excel.AppEvents_SheetChangeEventHandler(Application_SheetChange);
        }

        private void HandleSQL(object Sh, Excel.Range Target)
        {
            /*A SQL String to format will be passed in the following format
             * {Si} String in column i
             * {Ni} Number in column i
             * e.g. Update Employee Set Username={S1} where EmployeeID={N2}
             */
            string FindTagPattern = @"{[S|N][0-9]*}";
            string FindColumnPattern = @"[0-9]";

            string type;
            int col;
            int row;
            string val;
            string sql;
            if (Target.Value2.GetType().ToString() == "System.String")
            {
                sql = (string)Target.Value2;
            }
            else if (Target.Value2.GetType().ToString() == "System.Double")
            {
                double d = Target.Value2;
                sql = d.ToString();
            }
            else
            {
                sql = Target.Value2.GetType().ToString();
            }

            MatchCollection matches = Regex.Matches(sql, FindTagPattern);
            foreach (Match m in matches)
            {
                row = Target.Row;
                col = int.Parse(Regex.Match(m.Value, FindColumnPattern).Value);
                type = m.Value.Substring(1, 1);
                Excel.Range r = Globals.ThisAddIn.Application.get_Range(letters[col]+row.ToString(), missing);
                if (r.Value2.GetType().ToString() == "System.String")
                {
                    val = (string)r.Value2;
                }
                else if (r.Value2.GetType().ToString() == "System.Double")
                {
                    double d = r.Value2;
                    val = d.ToString();
                }
                else
                {
                    val = r.Value2.GetType().ToString();
                }
                if (type == "N")
                {
                    sql = sql.Replace(m.Value, val);
                }
                else if (type == "S")
                {
                    sql = sql.Replace(m.Value, "'" + val + "'");
                }
            }
            Target.Value2 = sql;
        }

           private void ThisAddIn_Shutdown(object sender, System.EventArgs e)
        {
        }

        #region VSTO generated code

        /// <summary>
        /// Required method for Designer support - do not modify
        /// the contents of this method with the code editor.
        /// </summary>
        private void InternalStartup()
        {
            this.Startup += new System.EventHandler(ThisAddIn_Startup);
            this.Shutdown += new System.EventHandler(ThisAddIn_Shutdown);
        }

        #endregion
    }
}