Skip to content

Commit

Permalink
Merge pull request #1 from Clever/birthday
Browse files Browse the repository at this point in the history
first commit
  • Loading branch information
azylman committed Nov 3, 2014
2 parents 0089969 + 3bafc97 commit be8a7b0
Show file tree
Hide file tree
Showing 12 changed files with 379 additions and 3 deletions.
24 changes: 24 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
image: bradrydzewski/go:1.3
script:
- make test
notify:
email:
recipients:
- [email protected]
hipchat:
room: Clever-Dev-CI
token: {{hipchat_token}}
on_started: true
on_success: true
on_failure: true
publish:
github:
branch: master
script:
- make release
artifacts:
- release
tag: v$(cat VERSION)
token: {{github_token}}
user: Clever
repo: csvlint
54 changes: 54 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
SHELL := /bin/bash
PKG = github.com/Clever/csvlint
PKGS = $(PKG)
VERSION := $(shell cat VERSION)
EXECUTABLE := csvlint
BUILDS := \
build/$(EXECUTABLE)-v$(VERSION)-darwin-amd64 \
build/$(EXECUTABLE)-v$(VERSION)-linux-amd64 \
build/$(EXECUTABLE)-v$(VERSION)-windows-amd64
COMPRESSED_BUILDS := $(BUILDS:%=%.tar.gz)
RELEASE_ARTIFACTS := $(COMPRESSED_BUILDS:build/%=release/%)

.PHONY: test golint

golint:
@go get github.com/golang/lint/golint

test: $(PKGS)

$(PKGS): golint
@go get -d -t $@
@gofmt -w=true $(GOPATH)/src/$@*/**.go
ifneq ($(NOLINT),1)
@echo "LINTING..."
@PATH=$(PATH):$(GOPATH)/bin golint $(GOPATH)/src/$@*/**.go
@echo ""
endif
ifeq ($(COVERAGE),1)
@go test -cover -coverprofile=$(GOPATH)/src/$@/c.out $@ -test.v
@go tool cover -html=$(GOPATH)/src/$@/c.out
else
@echo "TESTING..."
@go test $@ -test.v
endif

run:
@go run cmd/csvlint/main.go

build/$(EXECUTABLE)-v$(VERSION)-darwin-amd64:
GOARCH=amd64 GOOS=darwin go build -o "$@/$(EXECUTABLE)" $(PKG)/cmd/csvlint
build/$(EXECUTABLE)-v$(VERSION)-linux-amd64:
GOARCH=amd64 GOOS=linux go build -o "$@/$(EXECUTABLE)" $(PKG)/cmd/csvlint
build/$(EXECUTABLE)-v$(VERSION)-windows-amd64:
GOARCH=amd64 GOOS=windows go build -o "$@/$(EXECUTABLE).exe" $(PKG)/cmd/csvlint
build: $(BUILDS)
%.tar.gz: %
tar -C `dirname $<` -zcvf "$<.tar.gz" `basename $<`
$(RELEASE_ARTIFACTS): release/% : build/%
mkdir -p release
cp $< $@
release: $(RELEASE_ARTIFACTS)

clean:
rm -rf build release
65 changes: 62 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,63 @@
csv-checker
===========
# csvlint

command line utility that takes in a CSV and reports if it's a valid CSV, reporting any errors and their line numbers if not
`csvlint` is a library and command-line utility for linting CSV files according to [RFC 4180](http://tools.ietf.org/html/rfc4180).

It assumes that your CSV file has an initial header row.

Everything in this README file refers to the command-line utility.
For information about the library, see [godoc](http://godoc.org/github.com/Clever/csvlint).

## Installing

Standalone executables for multiple platforms are available via [Github Releases](https://github.com/Clever/csvlint/releases).

You can also compile from source:

```shell
go get github.com/Clever/csvlint/cmd/csvlint
```

## Usage

`csvlint [options] /path/to/csv/file`

### Options

* delimiter: the field delimiter to default with
* default: comma
* valid options: comma, tab
* if you want anything else, you're probably doing CSVs wrong
* lazyquotes: allow a quote to appear in an unquoted field and a non-doubled quote to appear in a quoted field. _WARNING: your file may pass linting, but not parse in the way you would expect_

### Examples

```shell
$ csvlint bad_quote.csv
Record #1 has error: bare " in non-quoted-field

unable to parse any further

$ csvlint --lazyquotes bad_quote.csv
file is valid

$ csvlint mult_long_columns.csv
Record #2 has error: wrong number of fields in line
Record #4 has error: wrong number of fields in line

$ csvlint --delimiter=tab mult_long_columns_tabs.csv
Record #2 has error: wrong number of fields in line
Record #4 has error: wrong number of fields in line

$ csvlint one_long_column.csv
Record #2 has error: wrong number of fields in line

$ csvlint perfect.csv
file is valid
```

### Exit codes

`csvlint` uses three different exit codes to mean different things:
* 0 - the file is valid
* 1 - couldn't parse the entire file
* 2 - could parse the file, but there were lint failures
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
68 changes: 68 additions & 0 deletions cmd/csvlint/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package main

import (
"flag"
"fmt"
"github.com/Clever/csvlint"
"os"
)

func printHelpAndExit(code int) {
flag.PrintDefaults()
os.Exit(code)
}

func main() {
delimiter := flag.String("delimiter", "comma", "field delimiter in the file. options: comma, tab")
lazyquotes := flag.Bool("lazyquotes", false, "try to parse improperly escaped quotes")
help := flag.Bool("help", false, "print help and exit")
flag.Parse()

if *help {
printHelpAndExit(0)
}

var comma rune
switch *delimiter {
case "comma":
comma = ','
case "tab":
comma = '\t'
default:
fmt.Printf("unrecognized delimiter '%s'\n\n", *delimiter)
printHelpAndExit(1)
}

if len(flag.Args()) != 1 {
fmt.Println("csvlint accepts a single filepath as an argument\n")
printHelpAndExit(1)
}

f, err := os.Open(flag.Args()[0])
if err != nil {
if os.IsNotExist(err) {
fmt.Printf("file '%s' does not exist\n", flag.Args()[0])
os.Exit(1)
} else {
panic(err)
}
}
defer f.Close()

invalids, halted, err := csvlint.Validate(f, comma, *lazyquotes)
if err != nil {
panic(err)
}
if len(invalids) == 0 {
fmt.Println("file is valid")
os.Exit(0)
}
for _, invalid := range invalids {
fmt.Println(invalid.Error())
}
if halted {
fmt.Println("\nunable to parse any further")
os.Exit(1)
}
os.Exit(2)
}
67 changes: 67 additions & 0 deletions linter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package csvlint

import (
"encoding/csv"
"fmt"
"io"
)

// CSVError returns information about an invalid record in a CSV file
type CSVError struct {
// Record is the invalid record. This will be nil when we were unable to parse a record.
Record []string
// Num is the record number of this record.
Num int
err error
}

// Error implements the error interface
func (e CSVError) Error() string {
return fmt.Sprintf("Record #%d has error: %s", e.Num, e.err.Error())
}

// Validate tests whether or not a CSV lints according to RFC 4180.
// The lazyquotes option will attempt to parse lines that aren't quoted properly.
func Validate(reader io.Reader, delimiter rune, lazyquotes bool) ([]CSVError, bool, error) {
r := csv.NewReader(reader)
r.TrailingComma = true
r.FieldsPerRecord = -1
r.LazyQuotes = lazyquotes
r.Comma = delimiter

var header []string
errors := []CSVError{}
records := 0
for {
record, err := r.Read()
if header != nil {
records++
}
if err != nil {
if err == io.EOF {
break
}
parsedErr, ok := err.(*csv.ParseError)
if !ok {
return errors, true, err
}
errors = append(errors, CSVError{
Record: nil,
Num: records,
err: parsedErr.Err,
})
return errors, true, nil
}
if header == nil {
header = record
continue
} else if len(record) != len(header) {
errors = append(errors, CSVError{
Record: record,
Num: records,
err: csv.ErrFieldCount,
})
}
}
return errors, false, nil
}
81 changes: 81 additions & 0 deletions linter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package csvlint

import (
"encoding/csv"
"github.com/stretchr/testify/assert"
"os"
"testing"
)

var validationTable = []struct {
file string
err error
invalids []CSVError
comma rune
halted bool
}{
{file: "./test_data/perfect.csv", err: nil, invalids: []CSVError{}},
{file: "./test_data/one_long_column.csv", err: nil, invalids: []CSVError{{
Record: []string{"d", "e", "f", "g"},
err: csv.ErrFieldCount,
Num: 2,
}}},
{file: "./test_data/mult_long_columns.csv", err: nil, invalids: []CSVError{
{
Record: []string{"d", "e", "f", "g"},
err: csv.ErrFieldCount,
Num: 2,
}, {
Record: []string{"k", "l", "m", "n"},
err: csv.ErrFieldCount,
Num: 4,
}},
},
{file: "./test_data/mult_long_columns_tabs.csv", err: nil, comma: '\t', invalids: []CSVError{
{
Record: []string{"d", "e", "f", "g"},
err: csv.ErrFieldCount,
Num: 2,
}, {
Record: []string{"k", "l", "m", "n"},
err: csv.ErrFieldCount,
Num: 4,
}},
},
}

func TestTable(t *testing.T) {
for _, test := range validationTable {
f, err := os.Open(test.file)
assert.Nil(t, err)
defer f.Close()
comma := test.comma
if test.comma == 0 {
comma = ','
}
invalids, halted, err := Validate(f, comma, false)
assert.Equal(t, test.err, err)
assert.Equal(t, halted, test.halted)
assert.Equal(t, test.invalids, invalids)
}
}

var errTable = []struct {
err error
message string
}{
{
err: CSVError{Record: []string{"a", "b", "c"}, Num: 3, err: csv.ErrFieldCount},
message: "Record #3 has error: wrong number of fields in line",
},
{
err: CSVError{Record: []string{"d", "e", "f"}, Num: 1, err: csv.ErrBareQuote},
message: `Record #1 has error: bare " in non-quoted-field`,
},
}

func TestErrors(t *testing.T) {
for _, test := range errTable {
assert.Equal(t, test.err.Error(), test.message)
}
}
3 changes: 3 additions & 0 deletions test_data/bad_quote.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
field1,field2,field3
john "the rock" smith,a,b
c,d,e
6 changes: 6 additions & 0 deletions test_data/mult_long_columns.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
field1,field2,field3
a,b,c
d,e,f,g
h,i,j
k,l,m,n
o,p,q
6 changes: 6 additions & 0 deletions test_data/mult_long_columns_tabs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
field1 field2 field3
a b c
d e f g
h i j
k l m n
o p q
4 changes: 4 additions & 0 deletions test_data/one_long_column.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
field1,field2,field3
a,b,c
d,e,f,g
h,i,j
3 changes: 3 additions & 0 deletions test_data/perfect.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
field1,field2,field3
a,b,c
d,e,f

0 comments on commit be8a7b0

Please sign in to comment.