I'm sure there's an easier way to do this, but I've been maintaining my own class to do the job for a while now and keep needing to find it in my code library every time I use it - I figure it might be useful to others:-
using System; using System.Collections.Generic; using System.Text; using System.Data; namespace CSVParser { /// <summary> /// Simo's stock CSV parser class /// </summary> public class CSVParser { /// <summary> /// Takes the CSV files contents and creates a data table from it /// </summary> /// <param name="csvFileContents">The entire contents of a CSV file split by line (rather than the filename)</param> /// <param name="validateLayout">Validate the file to check conformance of the layout with expected standards</param> /// <param name="topRowIsHeader">The top row is the header row, take it as the column names rather than data</param> /// <returns>A datatable</returns> public static DataTable DataTableFromCSV(string[] csvFileContents, bool validateLayout, bool topRowIsHeader) { DataTable outputDataTable = new DataTable(); List<string[]> csvFileRows = new List<string[]>(); List<string> columns = new List<string>(); #region Pre-parse the file int columnCount = 0; bool gotHeaders = false; int rowNumber = 0; foreach (string line in csvFileContents) { string[] parts = ExtractCSVElements(line); //initial set of header names but only if the top row is header option is set if (!gotHeaders && topRowIsHeader) { columns.AddRange(parts); columnCount = parts.Length; gotHeaders = true; } else { if (parts.Length > 0) { csvFileRows.Add(parts); } } if (parts.Length > columnCount) { //if set to validate the layout and that the first row contains the headers then we know any extra columns are wrong if (validateLayout && gotHeaders) { throw new Exception("Row has extra data columns: " + rowNumber.ToString()); } //new column detected mid-data-set! for (int i = columnCount; i < parts.Length; i++) { columns.Add("Column " + i.ToString()); } columnCount = parts.Length; } //we always ignore zero length rows as the last line can be empty if (parts.Length < columnCount && parts.Length != 0) { if (validateLayout) { throw new Exception("Row has missing data columns: " + rowNumber.ToString()); } } rowNumber++; } #endregion #region Build the data tables layout and data //columns foreach (string column in columns) { outputDataTable.Columns.Add(column); } //rows foreach (string[] row in csvFileRows) { outputDataTable.Rows.Add(row); } #endregion return outputDataTable; } /// <summary> /// Extract the elements of a line from a CSV file with support for quotes /// </summary> /// <param name="line">The data to parse</param> private static string[] ExtractCSVElements(string line) { List<string> elements = new List<string>(); //do the initial split, based on commas string[] firstParts = line.Split(','); //reparse it StringBuilder temporaryPart = new StringBuilder(""); bool inside = false; foreach (string part in firstParts) { //are we inside a quoted part, or did we just find a quote? if (!inside //we're not inside && (!part.Contains("\"") //and we don't contain a quote || ( //or we're handling a single quote enclosed element part.StartsWith("\"") //we start with a quote && part.EndsWith("\"") //and we end with a quote) ) ) { elements.Add(part); } else { if (inside) { //we are still inside a quote... temporaryPart.Append("," + part); if (part.Contains("\"")) { //then we are also at the end! elements.Add(temporaryPart.Replace("\"", "").ToString()); //add the part minus its quotes to the array //all done! inside = false; } } else { //else we just found a quote! inside = true; temporaryPart = new StringBuilder(part); } } } return elements.ToArray(); } } }
Permalink