CodePaste Logo
New Snippet New Snippet Recent Snippets Recent Snippets My Snippets My Snippets Web Code Search Snippets Search
Sign inor Register
Language: VB.NET

CSVHandler VB.NET

1926 Views
Copy Code Show/Hide Line Numbers
Imports System
Imports System.Collections
Imports System.IO
Imports System.Text
 
Namespace Handlers
 
    ''' <summary>
    ''' A data-reader style interface for reading Csv (and otherwise-char-separated) files.
    ''' </summary>
    Public Class CSVHandler
        Implements IDisposable
 
#Region "Private variables"
 
        Private stream As Stream
        Private reader As StreamReader
        Private m_separator As Char
 
#End Region
 
#Region "Constructors"
 
        ''' <summary>
        ''' Creates a new Csv reader for the given stream.
        ''' </summary>
        ''' <param name="s">The stream to read the CSV from.</param>
        Public Sub New(ByVal s As Stream)
            Me.New(s, Nothing, ","c)
        End Sub
 
        ''' <summary>
        ''' Creates a new reader for the given stream and separator.
        ''' </summary>
        ''' <param name="s">The stream to read the separator from.</param>
        ''' <param name="separator">The field separator character</param>
        Public Sub New(ByVal s As Stream, ByVal separator As Char)
            Me.New(s, Nothing, separator)
        End Sub
 
        ''' <summary>
        ''' Creates a new Csv reader for the given stream and encoding.
        ''' </summary>
        ''' <param name="s">The stream to read the CSV from.</param>
        ''' <param name="enc">The encoding used.</param>
        Public Sub New(ByVal s As Stream, ByVal enc As Encoding)
            Me.New(s, enc, ","c)
        End Sub
 
        ''' <summary>
        ''' Creates a new reader for the given stream, encoding and separator character.
        ''' </summary>
        ''' <param name="s">The stream to read the data from.</param>
        ''' <param name="enc">The encoding used.</param>
        ''' <param name="separator">The separator character between the fields</param>
        Public Sub New(ByVal s As Stream, ByVal enc As Encoding, ByVal separator As Char)
 
            Me.m_separator = separator
            Me.stream = s
            If Not s.CanRead Then
                Throw New CSVHandlerException("Could not read the given data stream!")
            End If
            reader = If((enc IsNot Nothing), New StreamReader(s, enc), New StreamReader(s))
        End Sub
 
        ''' <summary>
        ''' Creates a new Csv reader for the given text file path.
        ''' </summary>
        ''' <param name="filename">The name of the file to be read.</param>
        Public Sub New(ByVal filename As String)
            Me.New(filename, Nothing, ","c)
        End Sub
 
        ''' <summary>
        ''' Creates a new reader for the given text file path and separator character.
        ''' </summary>
        ''' <param name="filename">The name of the file to be read.</param>
        ''' <param name="separator">The field separator character</param>
        Public Sub New(ByVal filename As String, ByVal separator As Char)
            Me.New(filename, Nothing, separator)
        End Sub
 
        ''' <summary>
        ''' Creates a new Csv reader for the given text file path and encoding.
        ''' </summary>
        ''' <param name="filename">The name of the file to be read.</param>
        ''' <param name="enc">The encoding used.</param>
        Public Sub New(ByVal filename As String, ByVal enc As Encoding)
            Me.New(filename, enc, ","c)
        End Sub
 
        ''' <summary>
        ''' Creates a new reader for the given text file path, encoding and field separator.
        ''' </summary>
        ''' <param name="filename">The name of the file to be read.</param>
        ''' <param name="enc">The encoding used.</param>
        ''' <param name="separator">The field separator character.</param>
        Public Sub New(ByVal filename As String, ByVal enc As Encoding, ByVal separator As Char)
            Me.New(New FileStream(filename, FileMode.Open), enc, separator)
        End Sub
 
#End Region
 
#Region "Properties"
 
        ''' <summary>
        ''' The separator character for the fields. Comma for normal CSV.
        ''' </summary>
        Public Property Separator() As Char
            Get
                Return m_separator
            End Get
            Set(ByVal value As Char)
                m_separator = value
            End Set
        End Property
 
#End Region
 
#Region "Parsing"
 
        ''' <summary>
        ''' Returns the fields for the next row of data (or null if at eof)
        ''' </summary>
        ''' <returns>A string array of fields or null if at the end of file.</returns>
        Public Function GetCsvLine() As String()
 
            Dim data As String = reader.ReadLine()
            If data Is Nothing Then
                Return Nothing
            End If
            If data.Length = 0 Then
                Return New String(-1) {}
            End If
 
            Dim result As New ArrayList()
 
            ParseCsvFields(result, data)
 
            Return DirectCast(result.ToArray(GetType(String)), String())
        End Function
 
        ' Parses the fields and pushes the fields into the result arraylist
        Private Sub ParseCsvFields(ByVal result As ArrayList, ByVal data As String)
 
            Dim pos As Integer = -1
            While pos < data.Length
                result.Add(ParseCsvField(data, pos))
            End While
        End Sub
 
        ' Parses the field at the given position of the data, modified pos to match
        ' the first unparsed position and returns the parsed field
        Private Function ParseCsvField(ByVal data As String, ByRef startSeparatorPosition As Integer) As String
 
            If startSeparatorPosition = data.Length - 1 Then
                startSeparatorPosition += 1
                ' The last field is empty
                Return ""
            End If
 
            Dim fromPos As Integer = startSeparatorPosition + 1
 
            ' Determine if this is a quoted field
            If data(fromPos) = """"c Then
                ' If we're at the end of the string, let's consider this a field that
                ' only contains the quote
                If fromPos = data.Length - 1 Then
                    fromPos += 1
                    Return """"
                End If
 
                ' Otherwise, return a string of appropriate length with double quotes collapsed
                ' Note that FSQ returns data.Length if no single quote was found
                Dim nextSingleQuote As Integer = FindSingleQuote(data, fromPos + 1)
                startSeparatorPosition = nextSingleQuote + 1
                Return data.Substring(fromPos + 1, nextSingleQuote - fromPos - 1).Replace("""""", """")
            End If
 
            ' The field ends in the next separator or EOL
            Dim nextSeparator As Integer = data.IndexOf(m_separator, fromPos)
            If nextSeparator = -1 Then
                startSeparatorPosition = data.Length
                Return data.Substring(fromPos)
            Else
                startSeparatorPosition = nextSeparator
                Return data.Substring(fromPos, nextSeparator - fromPos)
            End If
        End Function
 
        ' Returns the index of the next single quote mark in the string 
        ' (starting from startFrom)
        Private Shared Function FindSingleQuote(ByVal data As String, ByVal startFrom As Integer) As Integer
 
            Dim i As Integer = startFrom - 1
            While System.Threading.Interlocked.Increment(i) < data.Length
                If data(i) = """"c Then
                    ' If this is a double quote, bypass the chars
                    If i < data.Length - 1 AndAlso data(i + 1) = """"c Then
                        i += 1
                        Continue While
                    Else
                        Return i
                    End If
                End If
            End While
            ' If no quote found, return the end value of i (data.Length)
            Return i
        End Function
 
        Public Shared Function DataTableFromCSV(ByVal Filename As String) As DataTable
            Dim sLine As String
            sLine = File.ReadAllText(Filename)
            Dim dtData As New DataTable
            Dim separator As Char = ","
            Dim ms As New MemoryStream(Encoding.Unicode.GetBytes(sLine))
            Using csv As New CSVHandler(ms, Encoding.Unicode, separator)
                Dim fields As String()
                Dim result As New StringBuilder()
                Dim lines As Integer = 0
                While (InlineAssignHelper(fields, csv.GetCsvLine())) IsNot Nothing
                    If lines = 0 Then
                        For Each field As String In fields
                            Dim dcCol As New DataColumn
                            dcCol.ColumnName = field
                            dtData.Columns.Add(dcCol)
                        Next
                    Else
                        dtData.LoadDataRow(fields, LoadOption.Upsert)
                    End If
                    lines += 1
                End While
            End Using
            Return dtData
        End Function
 
        Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T
            target = value
            Return value
        End Function
 
#End Region
 
#Region "Dispose"
 
        ''' <summary>
        ''' Disposes the reader. The underlying stream is closed.
        ''' </summary>
        Public Sub Dispose() Implements System.IDisposable.Dispose
            ' Closing the reader closes the underlying stream, too
            If reader IsNot Nothing Then
                reader.Close()
            ElseIf stream IsNot Nothing Then
                stream.Close()
            End If
            ' In case we failed before the reader was constructed
            GC.SuppressFinalize(Me)
        End Sub
 
#End Region
 
 
    End Class
 
 
    ''' <summary>
    ''' Exception class for CSVHandler exceptions.
    ''' </summary>
    <Serializable()> _
    Public Class CSVHandlerException
        Inherits ApplicationException
 
        ''' <summary>
        ''' Constructs a new CSVHandlerException.
        ''' </summary>
        Public Sub New()
            Me.New("The CSV Handler encountered an error.")
        End Sub
 
        ''' <summary>
        ''' Constructs a new exception with the given message.
        ''' </summary>
        ''' <param name="message">The exception message.</param>
        Public Sub New(ByVal message As String)
            MyBase.New(message)
        End Sub
 
        ''' <summary>
        ''' Constructs a new exception with the given message and the inner exception.
        ''' </summary>
        ''' <param name="message">The exception message.</param>
        ''' <param name="inner">Inner exception that caused this issue.</param>
        Public Sub New(ByVal message As String, ByVal inner As Exception)
            MyBase.New(message, inner)
        End Sub
 
        ''' <summary>
        ''' Constructs a new exception with the given serialization information.
        ''' </summary>
        ''' <param name="info"></param>
        ''' <param name="context"></param>
        Protected Sub New(ByVal info As System.Runtime.Serialization.SerializationInfo, ByVal context As System.Runtime.Serialization.StreamingContext)
            MyBase.New(info, context)
        End Sub
 
    End Class
 
End Namespace
by Utkarsh Puranik
  February 23, 2010 @ 3:43am
Tags:
Description:
This class can be used for converting a csv file into datatable without losing commas in values.

Add a comment


Report Abuse
brought to you by:
West Wind Techologies



If you find this site useful and use it frequently please consider making a donation to support this free service.
Donate