From 917b473fa93a225d62e0d6dc18b044d088908e61 Mon Sep 17 00:00:00 2001 From: nxshock Date: Mon, 3 Apr 2023 19:12:30 +0500 Subject: [PATCH] Initial commit --- .gitattributes | 2 + .gitignore | 21 ++ LICENSE | 21 ++ README.md | 2 + app.go | 209 ++++++++++++++++++ archivetypes.go | 50 +++++ charsets.go | 66 ++++++ fieldtypes.go | 137 ++++++++++++ filetypes.go | 63 ++++++ go.mod | 30 +++ go.sum | 104 +++++++++ logger.go | 21 ++ main.go | 17 ++ make.bat | 1 + options.go | 62 ++++++ reader.go | 28 +++ readercsv.go | 101 +++++++++ readercsv_test.go | 42 ++++ readerdbf.go | 124 +++++++++++ readerdbf_test.go | 38 ++++ readerxlsx.go | 159 +++++++++++++ readerxlsx_test.go | 36 +++ sql.go | 151 +++++++++++++ .../csv/9729337841_20032023_084313667.csv | 5 + testdata/dbf/38_052QB.dbf | Bin 0 -> 599 bytes testdata/xlsx/38_049RMZ_all.xlsx | Bin 0 -> 9124 bytes zipreader.go | 33 +++ 27 files changed, 1523 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 app.go create mode 100644 archivetypes.go create mode 100644 charsets.go create mode 100644 fieldtypes.go create mode 100644 filetypes.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 logger.go create mode 100644 main.go create mode 100644 make.bat create mode 100644 options.go create mode 100644 reader.go create mode 100644 readercsv.go create mode 100644 readercsv_test.go create mode 100644 readerdbf.go create mode 100644 readerdbf_test.go create mode 100644 readerxlsx.go create mode 100644 readerxlsx_test.go create mode 100644 sql.go create mode 100644 testdata/csv/9729337841_20032023_084313667.csv create mode 100644 testdata/dbf/38_052QB.dbf create mode 100644 testdata/xlsx/38_049RMZ_all.xlsx create mode 100644 zipreader.go diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b735ec --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a626811 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 nxshock + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b57224 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# mssqlbulkloader + diff --git a/app.go b/app.go new file mode 100644 index 0000000..e78bb83 --- /dev/null +++ b/app.go @@ -0,0 +1,209 @@ +package main + +import ( + "database/sql" + "fmt" + "io" + "os" + "time" + _ "time/tzdata" + + _ "github.com/denisenkom/go-mssqldb" + "github.com/urfave/cli" +) + +var app = &cli.App{ + Version: "2023.03.27", + Usage: "bulk loader into Microsoft SQL Server", + HideHelp: true, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "filepath", + Usage: "input file path", + Required: true, + TakesFile: true}, + &cli.StringFlag{ + Name: "type", + Usage: "input file type", + Required: false, + Value: "auto", + }, + &cli.StringFlag{ + Name: "encoding", + Usage: "input file encoding", + Required: false, + Value: "utf8", + }, + &cli.StringFlag{ + Name: "sheetname", + Usage: "Excel file sheet name", + Required: false}, + &cli.StringFlag{ + Name: "server", + Usage: "database server address", + Value: "127.0.0.1"}, + &cli.StringFlag{ + Name: "database", + Usage: "database name", + Required: true}, + &cli.StringFlag{ + Name: "table", + Usage: "table name in schema.name format", + Required: true}, + &cli.StringFlag{ + Name: "fields", + Usage: "list of field types in [sifdt ]+ format", + Required: true}, + &cli.BoolFlag{ + Name: "create", + Usage: "create table"}, + &cli.BoolFlag{ + Name: "overwrite", + Usage: "overwrite existing table"}, + &cli.IntFlag{ + Name: "skiprows", + Usage: "number of rows to skip before read header"}, + &cli.BoolFlag{ + Name: "unknowncolumnnames", + Usage: "insert to table with unknown column names", + }, + &cli.StringFlag{ + Name: "timezone", + Usage: "Time zone (IANA Time Zone database format)", + Value: "Local", + }, + &cli.StringFlag{ + Name: "comma", + Usage: "CSV file delimiter", + Value: ",", + }, + &cli.StringFlag{ + Name: "dateformat", + Usage: "date format (Go style)", + Value: "02.01.2006"}, + &cli.StringFlag{ + Name: "timestampformat", + Usage: "timestamp format (Go style)", + Value: "02.01.2006 15:04:05"}, + &cli.StringFlag{ + Name: "decompress", + Usage: "decompressor name for archived files", + }, + &cli.BoolFlag{ + Name: "silent", + Usage: "disable output", + }, + }, + Action: func(c *cli.Context) error { + initLogger(c.Bool("silent")) + + var comma rune + if c.String("comma") == "\\t" { + comma = rune("\t"[0]) + } else { + comma = rune(c.String("comma")[0]) + } + + location, err := time.LoadLocation(c.String("timezone")) + if err != nil { + return fmt.Errorf("parse timezone: %w", err) + } + + options := &Options{ + filePath: c.String("filepath"), + fileType: c.String("type"), + sheetName: c.String("sheetname"), + server: c.String("server"), + database: c.String("database"), + tableName: c.String("table"), + fieldsTypes: c.String("fields"), + create: c.Bool("create"), + overwrite: c.Bool("overwrite"), + skipRows: c.Int("skiprows"), + encoding: c.String("encoding"), + dateFormat: c.String("dateformat"), + timestampFormat: c.String("timestampformat"), + timezone: location, + decompress: c.String("decompress"), + unknownColumnNames: c.Bool("unknowncolumnnames"), + silent: c.Bool("silent"), + comma: comma, + } + + if options.decompress != "" { + var archiveType ArchiveType + err = archiveType.UnmarshalText([]byte(options.decompress)) + if err != nil { + return err + } + + ar, err := archiveType.Open() + if err != nil { + return err + } + + err = ar.Process(options) + if err != nil { + return err + } + } else { + f, err := os.Open(options.filePath) + if err != nil { + return err + } + defer f.Close() + + err = process(f, options) + if err != nil { + return err + } + } + + logger.Print("Complete.") + + return nil + }} + +func process(r io.Reader, options *Options) error { + var fileType FileType + err := fileType.UnmarshalText([]byte(options.fileType)) + if err != nil { + return err + } + + reader, err := fileType.Open(r, options) + if err != nil { + return err + } + defer reader.Close() + + db, err := sql.Open("sqlserver", fmt.Sprintf("sqlserver://%s?database=%s", options.server, options.database)) + if err != nil { + return fmt.Errorf("open database: %w", err) + } + defer db.Close() + + tx, err := db.Begin() + if err != nil { + return fmt.Errorf("begin transaction: %w", err) + } + + err = prepareTable(reader, tx) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare table: %w", err) + } + + err = insertData(reader, tx) + if err != nil { + tx.Rollback() + return fmt.Errorf("insert data: %w", err) + } + + err = tx.Commit() + if err != nil { + return fmt.Errorf("commit transaction: %w", err) + } + + return nil +} diff --git a/archivetypes.go b/archivetypes.go new file mode 100644 index 0000000..00ffa26 --- /dev/null +++ b/archivetypes.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" +) + +type ArchiveType int + +const ( + AutoDetectArchiveType ArchiveType = iota + Zip +) + +type ArchiveProcessor interface { + Process(options *Options) error +} + +func (ft ArchiveType) MarshalText() (text []byte, err error) { + switch ft { + case AutoDetectArchiveType: + return []byte("auto"), nil + case Zip: + return []byte("zip"), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft ArchiveType) Open() (ArchiveProcessor, error) { + switch ft { + case AutoDetectArchiveType: + case Zip: + return new(ZipReader), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft *ArchiveType) UnmarshalText(text []byte) error { + switch string(text) { + case "auto": + *ft = AutoDetectArchiveType + return nil + case "zip": + *ft = Zip + return nil + } + + return fmt.Errorf(`unknown format code "%s"`, string(text)) +} diff --git a/charsets.go b/charsets.go new file mode 100644 index 0000000..e359376 --- /dev/null +++ b/charsets.go @@ -0,0 +1,66 @@ +package main + +import ( + "fmt" + "io" + + "github.com/dimchansky/utfbom" + "golang.org/x/text/encoding/charmap" +) + +type Charset interface { + String(string) (string, error) + Reader(io.Reader) io.Reader +} + +type Charsets map[string]Charset + +var charsets = make(Charsets) + +func (c Charsets) Register(name string, charset Charset) { + c[name] = charset +} + +func (c Charsets) DecodeString(name string, input string) (string, error) { + decoder, ok := c[name] + if !ok { + return "", fmt.Errorf("unknown decoder: %s", name) + } + + if decoder == nil { + return input, nil + } + + return decoder.String(input) +} + +func (c Charsets) DecodeReader(name string, input io.Reader) (io.Reader, error) { + decoder, ok := charsets[name] + if !ok { + return nil, fmt.Errorf("unknown decoder: %s", name) + } + + if decoder == nil { + return input, nil + } + + return decoder.Reader(input), nil +} + +func init() { + charsets.Register("utf8", utf8decoder) + charsets.Register("win1251", charmap.Windows1251.NewDecoder()) + charsets.Register("cp866", charmap.CodePage866.NewDecoder()) +} + +type Utf8decoder struct{} + +var utf8decoder = new(Utf8decoder) + +func (d *Utf8decoder) Reader(r io.Reader) io.Reader { + return utfbom.SkipOnly(r) +} + +func (d *Utf8decoder) String(s string) (string, error) { + return s, nil +} diff --git a/fieldtypes.go b/fieldtypes.go new file mode 100644 index 0000000..c816c25 --- /dev/null +++ b/fieldtypes.go @@ -0,0 +1,137 @@ +package main + +import ( + "fmt" + "strconv" + "strings" + "time" +) + +type CustomDateParser interface { + Reader + ParseDate(rawValue string) (time.Time, error) +} + +type CustomDateTimeParser interface { + Reader + ParseDateTime(rawValue string) (time.Time, error) +} + +type FieldType int + +const ( + Skip FieldType = iota + Integer + String + Float + Money + Date + Timestamp +) + +func (ft FieldType) ParseValue(reader Reader, s string) (any, error) { + s = strings.TrimSpace(s) + + if s == "" { + return nil, nil + } + + switch ft { + case String: + return s, nil + case Integer: + return strconv.ParseInt(s, 10, 64) + case Float: + return strconv.ParseFloat(strings.ReplaceAll(s, ",", "."), 64) + case Date: + if i, ok := reader.(CustomDateParser); ok { + t, err := i.ParseDate(s) + if err != nil { + return nil, err + } + return /*t.Truncate(24 * time.Hour)*/ t, nil // TODO: проверить, нужен ли Truncate + } + + return time.ParseInLocation(reader.Options().dateFormat, s, reader.Options().timezone) + case Timestamp: + if i, ok := reader.(CustomDateTimeParser); ok { + t, err := i.ParseDateTime(s) + if err != nil { + return nil, err + } + return t.Truncate(24 * time.Second), nil + } + + return time.ParseInLocation(reader.Options().timestampFormat, s, reader.Options().timezone) + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft FieldType) SqlFieldType() string { + switch ft { + case Integer: + return "bigint" + case String: + return "nvarchar(255)" + case Float: + return "float" + case Money: + panic("do not implemented - see https://github.com/denisenkom/go-mssqldb/issues/460") // TODO: https://github.com/denisenkom/go-mssqldb/issues/460 + case Date: + return "date" + case Timestamp: + return "datetime2" + } + + return "" +} + +func (ft FieldType) MarshalText() (text []byte, err error) { + switch ft { + case Skip: + return []byte(" "), nil + case Integer: + return []byte("i"), nil + case String: + return []byte("s"), nil + case Float: + return []byte("f"), nil + case Money: + return []byte("m"), nil + case Date: + return []byte("d"), nil + case Timestamp: + return []byte("t"), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft *FieldType) UnmarshalText(text []byte) error { + switch string(text) { + case " ": + *ft = Skip + return nil + case "i": + *ft = Integer + return nil + case "s": + *ft = String + return nil + case "f": + *ft = Float + return nil + case "m": + *ft = Money + return nil + case "d": + *ft = Date + return nil + case "t": + *ft = Timestamp + return nil + } + + return fmt.Errorf(`unknown format code "%s"`, string(text)) +} diff --git a/filetypes.go b/filetypes.go new file mode 100644 index 0000000..54e1b6b --- /dev/null +++ b/filetypes.go @@ -0,0 +1,63 @@ +package main + +import ( + "fmt" + "io" +) + +type FileType int + +const ( + AutoDetect FileType = iota + Csv + Xlsx + Dbf +) + +func (ft FileType) MarshalText() (text []byte, err error) { + switch ft { + case AutoDetect: + return []byte("auto"), nil + case Csv: + return []byte("csv"), nil + case Xlsx: + return []byte("xlsx"), nil + case Dbf: + return []byte("dbf"), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft FileType) Open(r io.Reader, options *Options) (Reader, error) { + switch ft { + case AutoDetect: + case Csv: + return newCsvReader(r, options) + case Xlsx: + return newXlsxReader(r, options) + case Dbf: + return newDbfReader(r, options) + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft *FileType) UnmarshalText(text []byte) error { + switch string(text) { + case "auto": + *ft = AutoDetect + return nil + case "csv": + *ft = Csv + return nil + case "xlsx": + *ft = Xlsx + return nil + case "dbf": + *ft = Dbf + return nil + } + + return fmt.Errorf(`unknown format code "%s"`, string(text)) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..212351d --- /dev/null +++ b/go.mod @@ -0,0 +1,30 @@ +module github.com/nxshock/mssqlbulkloader + +go 1.20 + +require ( + github.com/SebastiaanKlippert/go-foxpro-dbf v1.2.0 + github.com/denisenkom/go-mssqldb v0.12.3 + github.com/dimchansky/utfbom v1.1.1 + github.com/stretchr/testify v1.8.2 + github.com/urfave/cli v1.22.12 + github.com/xuri/excelize/v2 v2.7.0 + golang.org/x/text v0.8.0 +) + +require ( + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect + github.com/golang-sql/sqlexp v0.1.0 // indirect + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/richardlehane/mscfb v1.0.4 // indirect + github.com/richardlehane/msoleps v1.0.3 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 // indirect + github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 // indirect + golang.org/x/crypto v0.7.0 // indirect + golang.org/x/net v0.8.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ef0e636 --- /dev/null +++ b/go.sum @@ -0,0 +1,104 @@ +github.com/Azure/azure-sdk-for-go/sdk/azcore v0.19.0/go.mod h1:h6H6c8enJmmocHUbLiiGY6sx7f9i+X3m1CHdd5c6Rdw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.11.0/go.mod h1:HcM1YX14R7CJcghJGOYCgdezslRSVzqwLf/q+4Y2r/0= +github.com/Azure/azure-sdk-for-go/sdk/internal v0.7.0/go.mod h1:yqy467j36fJxcRV2TzfVZ1pCb5vxm4BtZPUdYWe/Xo8= +github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/SebastiaanKlippert/go-foxpro-dbf v1.2.0 h1:11OnIKzaY952Atj9pLewuG09DdRv6CCm2XnZaTEcWn0= +github.com/SebastiaanKlippert/go-foxpro-dbf v1.2.0/go.mod h1:VnyVS1nyFfnCduBoWvjYuRp5Ce3KqZThky+ECDvJmEA= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/denisenkom/go-mssqldb v0.12.3 h1:pBSGx9Tq67pBOTLmxNuirNTeB8Vjmf886Kx+8Y+8shw= +github.com/denisenkom/go-mssqldb v0.12.3/go.mod h1:k0mtMFOnU+AihqFxPMiF05rtiDrorD1Vrm1KEz5hxDo= +github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U= +github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= +github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= +github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI= +github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM= +github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= +github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8= +github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 h1:6932x8ltq1w4utjmfMPVj09jdMlkY0aiA6+Skbtl3/c= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/excelize/v2 v2.7.0 h1:Hri/czwyRCW6f6zrCDWXcXKshlq4xAZNpNOpdfnFhEw= +github.com/xuri/excelize/v2 v2.7.0/go.mod h1:ebKlRoS+rGyLMyUx3ErBECXs/HNYqyj+PbkkKRK5vSI= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 h1:OAmKAfT06//esDdpi/DZ8Qsdt4+M5+ltca05dA5bG2M= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= +golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/logger.go b/logger.go new file mode 100644 index 0000000..23dc2b8 --- /dev/null +++ b/logger.go @@ -0,0 +1,21 @@ +package main + +import ( + "io" + "log" + "os" +) + +type Logger struct { + silent bool +} + +var logger *log.Logger + +func initLogger(silent bool) { + if silent { + logger = log.New(io.Discard, "", 0) + } else { + logger = log.New(os.Stderr, "", 0) + } +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..e1cb2f3 --- /dev/null +++ b/main.go @@ -0,0 +1,17 @@ +package main + +import ( + "log" + "os" +) + +func init() { + log.SetFlags(0) +} + +func main() { + err := app.Run(os.Args) + if err != nil { + log.Fatalln(err) + } +} diff --git a/make.bat b/make.bat new file mode 100644 index 0000000..5b21e00 --- /dev/null +++ b/make.bat @@ -0,0 +1 @@ +go build -trimpath -buildmode=pie -ldflags "-linkmode external -s -w" \ No newline at end of file diff --git a/options.go b/options.go new file mode 100644 index 0000000..27dd2e9 --- /dev/null +++ b/options.go @@ -0,0 +1,62 @@ +package main + +import "time" + +type Options struct { + // Source file path + filePath string + + // Source file type + fileType string + + // Server address + server string + + // Database name + database string + + // Table name + tableName string + + // comma delimiter for CSV files + comma rune + + // Number of rows to skip before reading of header + skipRows int + + // List of fiels types + fieldsTypes string + + // Date format + dateFormat string + + // Date+time format + timestampFormat string + + // Sheet name for Excel file + sheetName string + + // CSV/DBF codepage + encoding string + + // create table before inserting data + create bool + + // Drop existing table before creating + overwrite bool + + // Disable progress output + silent bool + + // Input file dates timezone + timezone *time.Location + + // Decompress before process + decompress string + + // Unknown column names + unknownColumnNames bool + + // Column names list + columnNames []string +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..d4fcbaa --- /dev/null +++ b/reader.go @@ -0,0 +1,28 @@ +package main + +type Reader interface { + // GetHeaders returns list of column names + GetHeader() []string + + // GetRows returns next one file row or io.EOF + GetRow(asString bool) ([]any, error) + + // Options returns options + Options() *Options + + Close() error +} + +func getHeader(r Reader) ([]string, error) { + headerAny, err := r.GetRow(true) + if err != nil { + return nil, err + } + + header := make([]string, 0, len(headerAny)) + for _, v := range headerAny { + header = append(header, v.(string)) + } + + return header, nil +} diff --git a/readercsv.go b/readercsv.go new file mode 100644 index 0000000..07e7645 --- /dev/null +++ b/readercsv.go @@ -0,0 +1,101 @@ +package main + +import ( + "bufio" + "encoding/csv" + "fmt" + "io" +) + +type CsvReader struct { + reader *csv.Reader + header []string + options *Options +} + +func NewCsvReader(r io.Reader, options *Options) (*CsvReader, error) { + return newCsvReader(r, options) +} + +func newCsvReader(r io.Reader, options *Options) (*CsvReader, error) { + decoder, err := charsets.DecodeReader(options.encoding, r) + if err != nil { + return nil, fmt.Errorf("enable decoder: %v", options.encoding) + } + + bufReader := bufio.NewReaderSize(decoder, 4*1024*1024) + + for i := 0; i < options.skipRows; i++ { + _, _, err := bufReader.ReadLine() + if err != nil { + return nil, fmt.Errorf("skip rows: %v", err) + } + } + + re := csv.NewReader(bufReader) + re.Comma = options.comma + re.FieldsPerRecord = len(options.fieldsTypes) + + csvReader := &CsvReader{ + reader: re, + options: options} + + header, err := getHeader(csvReader) + if err != nil { + return nil, err + } + + csvReader.header = header + + return csvReader, nil + +} + +func (r *CsvReader) GetHeader() []string { + return r.header +} + +func (r *CsvReader) Options() *Options { + return r.options +} + +func (r *CsvReader) GetRow(asStrings bool) ([]any, error) { + record, err := r.reader.Read() + if err == io.EOF { + return nil, err + } + if err != nil { + return nil, fmt.Errorf("read record: %v", err) + } + + var args []any + + for i, v := range record { + var fieldType FieldType + err = fieldType.UnmarshalText([]byte{r.options.fieldsTypes[i]}) + if err != nil { + return nil, fmt.Errorf("get record type: %v", err) + } + + if fieldType == Skip { + continue + } + + if asStrings { + fieldType = String + } + + parsedValue, err := fieldType.ParseValue(r, v) + if err != nil { + return nil, fmt.Errorf("parse value: %v", err) + } + + args = append(args, parsedValue) + } + + return args, nil +} + +func (r *CsvReader) Close() error { + return nil +} diff --git a/readercsv_test.go b/readercsv_test.go new file mode 100644 index 0000000..f2f6607 --- /dev/null +++ b/readercsv_test.go @@ -0,0 +1,42 @@ +package main + +import ( + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestCsvReaderBasic(t *testing.T) { + f, err := os.Open("testdata/csv/9729337841_20032023_084313667.csv") + assert.NoError(t, err) + + options := &Options{ + encoding: "win1251", + comma: rune(";"[0]), + skipRows: 3, + fieldsTypes: "s ttffsssss", + dateFormat: "02.01.2006", + timestampFormat: "02.01.2006 15:04:05", + timezone: time.Local} + + csvReader, err := NewCsvReader(f, options) + assert.NoError(t, err) + + assert.Equal(t, []string{"RRN", "Дата операции", "Дата ПП", "Сумма операции", "Сумма расчета", "Номер карты", "Код авторизации", "Тип операции", "Доп. информация_1", "Доп. информация_2"}, csvReader.GetHeader()) + + row, err := csvReader.GetRow(false) + assert.NoError(t, err) + + t1 := time.Date(2023, 03, 19, 17, 49, 35, 0, time.Local) + t2 := time.Date(2023, 03, 20, 0, 0, 0, 0, time.Local) + assert.Equal(t, []any{"307814009186", t1, t2, 499.00, 488.52, "522598******7141", "REZE64", "Покупка", "35068281112", "307817403283"}, row) + + row, err = csvReader.GetRow(false) + assert.Equal(t, err, io.EOF) + + err = csvReader.Close() + assert.NoError(t, err) +} diff --git a/readerdbf.go b/readerdbf.go new file mode 100644 index 0000000..ea6fe30 --- /dev/null +++ b/readerdbf.go @@ -0,0 +1,124 @@ +package main + +import ( + "bytes" + "fmt" + "io" + "time" + + dbf "github.com/SebastiaanKlippert/go-foxpro-dbf" +) + +func init() { + dbf.SetValidFileVersionFunc(func(version byte) error { + return nil + }) +} + +type DbfReader struct { + reader *dbf.DBF + header []string + options *Options +} + +func NewDbfReader(r io.Reader, options *Options) (*DbfReader, error) { + return newDbfReader(r, options) +} + +func newDbfReader(r io.Reader, options *Options) (*DbfReader, error) { + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + br := bytes.NewReader(b) + + re, err := dbf.OpenStream(br, nil, &dbf.UTF8Decoder{}) + if err != nil { + return nil, err + } + + dbfReader := &DbfReader{ + reader: re, + options: options} + + fullHeader := re.FieldNames() + var header []string + for i, v := range options.fieldsTypes { + if v == ' ' { + continue + } + + s, err := charsets.DecodeString(options.encoding, fullHeader[i]) + if err != nil { + return nil, err + } + + header = append(header, s) + } + + dbfReader.header = header + + return dbfReader, nil +} + +func (r *DbfReader) GetHeader() []string { + return r.header +} + +func (r *DbfReader) Options() *Options { + return r.options +} + +func (r *DbfReader) GetRow(asStrings bool) ([]any, error) { + if r.reader.EOF() { + return nil, io.EOF + } + + record, err := r.reader.Record() + if err != nil { + return nil, fmt.Errorf("read record: %v", err) + } + + r.reader.Skip(1) + + var args []any + + for i, v := range record.FieldSlice() { + var fieldType FieldType + err = fieldType.UnmarshalText([]byte{r.options.fieldsTypes[i]}) + if err != nil { + return nil, fmt.Errorf("get record type: %v", err) + } + + if fieldType == Skip { + continue + } + + decV, err := charsets.DecodeString(r.options.encoding, fmt.Sprint(v)) + if err != nil { + return nil, err + } + + parsedValue, err := fieldType.ParseValue(r, decV) + if err != nil { + return nil, fmt.Errorf("parse value: %v", err) + } + + args = append(args, parsedValue) + } + + return args, nil +} + +func (r *DbfReader) Close() error { + return nil +} + +func (r *DbfReader) ParseDate(rawValue string) (time.Time, error) { + return time.ParseInLocation("02.01.2006", rawValue, r.options.timezone) +} + +func (r *DbfReader) ParseDateTime(rawValue string) (time.Time, error) { + return time.ParseInLocation("02.01.2006 15:04:05", rawValue, r.options.timezone) +} diff --git a/readerdbf_test.go b/readerdbf_test.go new file mode 100644 index 0000000..18753c8 --- /dev/null +++ b/readerdbf_test.go @@ -0,0 +1,38 @@ +package main + +import ( + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestDbfReaderBasic(t *testing.T) { + f, err := os.Open("testdata/dbf/38_052QB.dbf") + assert.NoError(t, err) + + options := &Options{ + fieldsTypes: "sssssstdffsss", + timezone: time.Local, + encoding: "cp866"} + + dbfReader, err := NewDbfReader(f, options) + assert.NoError(t, err) + + assert.Equal(t, []string{"TRAN_ID", "БАНК", "ОТДЕЛЕНИЕ", "ТОЧКА", "НАЗВАНИЕ", "ТЕРМИНАЛ", "ДАТА_ТРАН", "ДАТА_РАСЧ", "СУММА_ТРАН", "СУММА_РАСЧ", "КАРТА", "КОД_АВТ", "ТИП"}, dbfReader.GetHeader()) + + row, err := dbfReader.GetRow(false) + assert.NoError(t, err) + + t1 := time.Date(2023, 02, 20, 5, 57, 12, 0, time.Local) + t2 := time.Date(2023, 02, 21, 0, 0, 0, 0, time.Local) + assert.Equal(t, []any{"719089383780", "44", "8644", "570000009312", "STOLOVAYA TSPP", "844417", t1, t2, 1757.08, 1713.15, "536829XXXXXX9388", "UM1TS8", "D"}, row) + + row, err = dbfReader.GetRow(false) + assert.Equal(t, err, io.EOF) + + err = dbfReader.Close() + assert.NoError(t, err) +} diff --git a/readerxlsx.go b/readerxlsx.go new file mode 100644 index 0000000..4b094ba --- /dev/null +++ b/readerxlsx.go @@ -0,0 +1,159 @@ +package main + +import ( + "errors" + "fmt" + "io" + "strconv" + "time" + + "github.com/xuri/excelize/v2" +) + +type XlsxReader struct { + streamReader *excelize.File + rows *excelize.Rows + header []string + options *Options +} + +func NewXlsxReader(r io.Reader, options *Options) (*XlsxReader, error) { + return newXlsxReader(r, options) +} + +func newXlsxReader(r io.Reader, options *Options) (*XlsxReader, error) { + streamReader, err := excelize.OpenReader(r) + if err != nil { + return nil, fmt.Errorf("open reader: %w", err) + } + + sheetName := options.sheetName + if sheetName == "" { + if len(streamReader.GetSheetList()) == 0 { + streamReader.Close() + return nil, fmt.Errorf("get sheet list: %w", errors.New("file does not contains any sheets")) + } + sheetName = streamReader.GetSheetList()[0] + } + + rows, err := streamReader.Rows(sheetName) + if err != nil { + streamReader.Close() + return nil, fmt.Errorf("read rows: %w", err) + } + + xlsxReader := &XlsxReader{ + streamReader: streamReader, + options: options, + rows: rows} + + for i := 0; i < options.skipRows; i++ { + _, err := xlsxReader.GetRow(true) + if err != nil { + streamReader.Close() + return nil, fmt.Errorf("skip rows: %w", err) + } + } + + header, err := getHeader(xlsxReader) + if err != nil { + streamReader.Close() + return nil, fmt.Errorf("read header: %w", err) + } + xlsxReader.header = header + + return xlsxReader, nil + +} + +func (r *XlsxReader) GetHeader() []string { + return r.header +} + +func (r *XlsxReader) Options() *Options { + return r.options +} + +func (r *XlsxReader) GetRow(asStrings bool) ([]any, error) { + end := !r.rows.Next() + if end { + return nil, io.EOF + } + + record, err := r.rows.Columns(excelize.Options{RawCellValue: true}) + if err != nil { + return nil, err + } + + var args []any + + for i, v := range record { + var fieldType FieldType + err = fieldType.UnmarshalText([]byte{r.options.fieldsTypes[i]}) + if err != nil { + return nil, fmt.Errorf("get record type: %v", err) + } + + if fieldType == Skip { + continue + } + if asStrings { + fieldType = String + } + + parsedValue, err := fieldType.ParseValue(r, v) + if err != nil { + return nil, fmt.Errorf("parse value: %v", err) + } + + args = append(args, parsedValue) + } + + return args, nil +} + +func (r *XlsxReader) Close() error { + err := r.rows.Close() + if err != nil { + return err + } + + err = r.streamReader.Close() + if err != nil { + return err + } + + return nil +} + +func (r *XlsxReader) ParseDate(rawValue string) (time.Time, error) { + f, err := strconv.ParseFloat(rawValue, 64) + if err != nil { + return time.Time{}, err + } + + t, err := excelize.ExcelDateToTime(f, false) + if err != nil { + return time.Time{}, err + } + + t = time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), r.options.timezone) + + return t, nil +} + +func (r *XlsxReader) ParseDateTime(rawValue string) (time.Time, error) { + f, err := strconv.ParseFloat(rawValue, 64) + if err != nil { + return time.Time{}, err + } + + t, err := excelize.ExcelDateToTime(f, false) + if err != nil { + return time.Time{}, err + } + + t = time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), r.options.timezone) + + return t, nil +} diff --git a/readerxlsx_test.go b/readerxlsx_test.go new file mode 100644 index 0000000..3bb56c6 --- /dev/null +++ b/readerxlsx_test.go @@ -0,0 +1,36 @@ +package main + +import ( + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestXlsxReaderBasic(t *testing.T) { + f, err := os.Open("testdata/xlsx/38_049RMZ_all.xlsx") + assert.NoError(t, err) + + options := &Options{skipRows: 0, fieldsTypes: "s sssssssssttfffssss", timezone: time.Local} + + xlsxReader, err := NewXlsxReader(f, options) + assert.NoError(t, err) + + assert.Equal(t, []string{"ИНН предприятия", "Город", "Адрес ТСТ", "Обслуживающее отделение", "Расчетное отделение", "RRN операции", "Название ТСТ", "Мерчант ТСТ", "Расчетный мерчант", "Терминал", "Дата проведения операции", "Дата обработки операции", "Сумма операции", "Комиссия за операцию", "Сумма к расчету", "Карта", "Код авторизации", "Тип операции", "Тип карты"}, xlsxReader.GetHeader()) + + row, err := xlsxReader.GetRow(false) + assert.NoError(t, err) + + t1 := time.Date(2023, 02, 17, 1, 5, 12, 0, time.Local) + t2 := time.Date(2023, 02, 18, 6, 24, 24, 0, time.Local) // TODO: в excel-файле 37 секунд? + + assert.Equal(t, []any{"7710146208", nil, nil, "99386901", "99386901", "304722813269", "TSENTRALNYY TELEGRAF", "780000334079", "780000334079", "10432641", t1, t2, 50.00, 0.80, 49.20, "553691******1214", "026094", "D", "MC OTHER"}, row) + + row, err = xlsxReader.GetRow(false) + assert.Equal(t, io.EOF, err) + + err = xlsxReader.Close() + assert.NoError(t, err) +} diff --git a/sql.go b/sql.go new file mode 100644 index 0000000..614c9a5 --- /dev/null +++ b/sql.go @@ -0,0 +1,151 @@ +package main + +import ( + "database/sql" + "fmt" + "io" + "os" + "strings" + + mssql "github.com/denisenkom/go-mssqldb" +) + +// TODO: add escaping +func prepareTable(reader Reader, tx *sql.Tx) error { + if reader.Options().unknownColumnNames { + var columnNames []string + + sql := fmt.Sprintf("SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA + '.' + TABLE_NAME = '%s' ORDER BY ORDINAL_POSITION", reader.Options().tableName) + rows, err := tx.Query(sql) + if err != nil { + return fmt.Errorf("get column names from database: %w", err) + } + defer rows.Close() + + for rows.Next() { + if rows.Err() != nil { + return fmt.Errorf("get column names from database: %w", err) + } + var columnName string + err = rows.Scan(&columnName) + if err != nil { + return fmt.Errorf("get column names from database: %w", err) + } + columnNames = append(columnNames, columnName) + } + + reader.Options().columnNames = columnNames + } else { + reader.Options().columnNames = reader.GetHeader() + } + + if !reader.Options().create && !reader.Options().overwrite { + return nil + } + + if !reader.Options().create && reader.Options().overwrite { + logger.Println("Truncating table...") + _, err := tx.Exec(fmt.Sprintf("TRUNCATE TABLE %s", reader.Options().tableName)) + if err != nil { + return err + } + } + + if reader.Options().overwrite { + logger.Println("Dropping table...") + _, err := tx.Exec(fmt.Sprintf("IF object_id('%s', 'U') IS NOT NULL DROP TABLE %s", reader.Options().tableName, reader.Options().tableName)) + if err != nil { + return fmt.Errorf("drop table: %w", err) + } + } + + sql := fmt.Sprintf("CREATE TABLE %s (", reader.Options().tableName) + + fieldTypes := strings.ReplaceAll(reader.Options().fieldsTypes, " ", "") + + for i, columnName := range reader.Options().columnNames { + var fieldType FieldType + err := fieldType.UnmarshalText([]byte{fieldTypes[i]}) + if err != nil { + return fmt.Errorf("detect field type: %w", err) + } + + sql += fmt.Sprintf(`"%s" %s`, columnName, fieldType.SqlFieldType()) + + if i+1 < len(reader.GetHeader()) { + sql += ", " + } else { + sql += ") WITH (DATA_COMPRESSION = PAGE)" // TODO: add optional params + } + } + + logger.Println("Creating table...") + logger.Println(sql) + _, err := tx.Exec(sql) + if err != nil { + return fmt.Errorf("execute table creation: %w", err) + } + + return nil +} + +func insertData(reader Reader, tx *sql.Tx) error { + columnNames := reader.GetHeader() + if reader.Options().unknownColumnNames { + columnNames = reader.Options().columnNames + } + + sql := mssql.CopyIn(reader.Options().tableName, mssql.BulkOptions{Tablock: true}, columnNames...) + + stmt, err := tx.Prepare(sql) + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("prepare statement: %w", err) + } + + n := 0 + for { + if n%100000 == 0 { + if !reader.Options().silent { + fmt.Fprintf(os.Stderr, "Processed %d records...\r", n) + } + } + + record, err := reader.GetRow(false) + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("read record: %w", err) + } + + _, err = stmt.Exec(record...) + if err != nil { + _ = stmt.Close() + _ = tx.Rollback() + return fmt.Errorf("execute statement: %w", err) + } + n++ + } + result, err := stmt.Exec() + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("execute statement: %w", err) + } + rowsAffected, err := result.RowsAffected() + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("calc rows affected: %w", err) + } + if !reader.Options().silent { + fmt.Fprintf(os.Stderr, "Processed %d records. \n", rowsAffected) + } + + err = stmt.Close() + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("close statement: %w", err) + } + + return nil +} diff --git a/testdata/csv/9729337841_20032023_084313667.csv b/testdata/csv/9729337841_20032023_084313667.csv new file mode 100644 index 0000000..634ac4b --- /dev/null +++ b/testdata/csv/9729337841_20032023_084313667.csv @@ -0,0 +1,5 @@ +skip +skip +skip +RRN; ;; ; ; ; ; ; ; ; ; ; ;. _1;. _2 +307814009186; ; ;781000815902;WINK;28403560;19.03.2023 17:49:35;20.03.2023 00:00:00;499,00;488,52;522598******7141;REZE64;;35068281112;307817403283 diff --git a/testdata/dbf/38_052QB.dbf b/testdata/dbf/38_052QB.dbf new file mode 100644 index 0000000000000000000000000000000000000000..fd127ab7aeaa9cbb69bd2a00c6cda7c05b8de188 GIT binary patch literal 599 zcmZvXK~BRk5Jl615JEr~2o|h-g2;C4+Uc&M8zg8e5)iC%5H_YtM3g2WH9|cPHzOXK zsxth=$euUtCf0(|L9~GvLOQQ&aZzrkcuKfe-hXIkq}4)atPmz776kkLOCOOn0hr{Ng{}sm<2$6OV5` z>)Hj(4LbKeXgLG`A9=h|omMsz-skVH+B{P$Z6<&p+1Xvy<;U+CxECLsAwgm+<48tA zV#ef{F)lDDjtE5vCF$aB@icvzqBL19`vyod#)v?O;t%8~k`r(nQYC^$$J$RBiwO literal 0 HcmV?d00001 diff --git a/testdata/xlsx/38_049RMZ_all.xlsx b/testdata/xlsx/38_049RMZ_all.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bae2e0f3567080ce06afdc37d486bfd3490667c8 GIT binary patch literal 9124 zcmeHN1zQ~1(w^Xwz%aPGdjdfdAi*s-!GjF$?lQn&fnXuHLvRc3?gR^N!3j24aKbm) zyZ7#HcE4Y6@0sW6)ALMM^>m-Ns@|$PN^*!ucmNat8UO&G1{fWtnd?6Q0HToq09*hX zg3e1jTW7GXvw@n2J=jT)!`;T3DjNxbDHDJIzy5#YzxWN5#1AQUaRDVRq;Fqtu*)u1 ziK6iyK>9!|%Fo-o<9dsXbE?*XqPn|77ZoF~TwzG`aGKLoQ!5{Z0uM=5Vfa~*c5k#5Yn!*Ow`UtuRK%Im zb>mZfa|3prbkP86$iyr_$tt3np!Birs86>DevbUmQ76^X4gD%ES@#O5Nn`rEXcI*u z8XLB+d41{PuLSs5Aif{_2kQCTUmX~edE4fXqi!HcT;AiI<9us8ZGQnAXs4$02|4)M z|4qnie{54H@bEa{o8RLfC;-6yJt9EqFJ@Vz&P96$$C^Cc>M-GEY2XO9cH-ptdHkO_ z{ukTepB}v=R^e3_*Fz{w`ZlEhVrnTG_(IN2OuC6$&DURM5xXWbhmLfqjgbtfMiPi9 z<=5hSJGihU9J$+1eZIz15&^;!qOEl+g?_kpa6x5Za7>VNC|T{sah>`;b)F<8>%rvO z@{Xmrz936(V1-t4@>skQILe_(f{vR<1|@iw6rkO!ptWptS@vM+h3Y{mw4#nLb2n}@ z#cw7d|Jx&xPyzYhD)1>Z;}f78OS6j)F8E1}4_+r{XDXxOd*{ zimCl7G(4co$D>kx^qJrNw5xfJhtfQ{`Ov$H`;P`75tIeX@CfyHBMEpO|KSw=MjGKp zLIOa0;BL+NFFSFwbF?zDv$Ogcz5Z=858xpTuI0abl&C1kb#nn*Fn@$_xuv+`w}TU@Zs-A$j6j!-SCS zMovc%(TomCDIikFxS;*rz2XB*tYjzLhUc;UZ0UIXTbE4q#34EPG~#Cs6Hp$@&s;8# zEuF^52jQ3*s^0Lw4pQAOg;dwoQ6)2*Zr(S%TsTL`fOt@jpAw_X2tV=wjPHD*X1(l? zI7d5azBN_w_-xgYbNz6WtXPhsi1(Q{)R`)zUy9O?Io4@2JE=(>w(9)iW{|mU;e>7I z3}(79&^NULU+KR?23j6MM+Aq<132BmhF<}P%%8!M8#m%O$p!3#4Ig2VERil)fMgk5 zQMHL24F#*K2@Omz*i`~F{ z&G#8U9*--dDUzmvw{t)wbHd5N8&6`S zD9y~oltdDqGQ3`vE$(XYS4&JbH++B1$BEEY8YOviK~TcEz(Z9$NLRyZB*KAIDDr^t z^A+`MSFhx?(GF%)J_Vod$ReB=bmm)Ef0gV@M?FhpEe7rbj%3&XzpyB4N2x>p5w1A$ zIxY4|WG7X4Z+x6OHJ$FeDqM{j%A{Spn0V=}+a_K2e&fdWvFf9~Ae-H~$mn{K5!1aHikr7%%5_gL7yOC#acu(z3tEC?K72dabKHC2@m?^Y@yZE#d zfk1*&r>c{4T`r*kbBlH2HSy#rxbdd8$*+E43Dyep74}x(RBlPol9r1T9GXv zB0^LE0PkNQaxyn`1e>TjJ6hP9IsJ4ysp@)m6G}v_e8(cGXA|Au&jMe3m(`D6OK6hAh}O z=Vo>zP1p~Xle&8T2%$+tVNHRWb!zU2hP21?t&(_lxMlcl*t9oMd*)ML2cNEC>55WH zz7~L@>#kb9-PQt(fB4lE>X(2; zT)c&@j4n}vLG%E>Hy#mfeLoIF-SLz-KJ-j;Nhsm2!lG;1rr)K2DW@auxA??-t0U^{ zQjRENZjpR(R(6m%#FroCMk>t`31PpyF;H?PWu{D$sea(2 zp9ZU(^d0pk}Vh zmvVW0zxFN?RqkooU??M#et7moJq7z0`gWt+salY_axsZb<7@eG=Wk;6GF7=yMxMeF zWxYtcBW3=0>3TDZstmu_0f|xPagA=mlq=GI@^yIq!htPtcG*VqGqwB8*EySmZNQwr z9l3v^bYE-9mQ(_-Rceh~!wYt8=Vl}2!s2DaH4(vY;99NV$ zCw_%8q)6W9(mXxUhNRRQzBk>dKcjOU2jC&=>{sO|dR4b%?zZ@_*ndJQ5T;nF%5LS} z8?Ix83mVSBHp+A zGb{H4&))K5_e^qPr9TsPzusi@^S*EY+Jc`GfB}mGGERe=&Gd|^AC9$CVy}Jpw1XOI zPQV0ML7CB`FmE4n5?82yI{ng}7G;Y20q1913*EXv_ujCqZ8@=m2X#a_HUy-I_A+OA z;`JgC3|pVqlfs*zV`YVQloHyIrcccMoZJm(h$Z9AvdZ1#NK%P9YeKbwm4?oI=;LdR z5>Rfr*+d<_uqnxIqS)ue=E3o#HqpS7$9yd-7z!LWOu3frd^p(hbamUuN_iOSsNPQD)j(Zpdeg_V`Bci-ytcv8*#(5=^cDkBLwGvvDVV0^hJNeZ=f= z0cyTi(WhEV(X#H}lo&p{a8T?+lY1Y~AR2cTE&g%$h8yg5PY&j@IwLl3oIBnt@L*-= zb+cSggk}WYV6(WPj${`T=_gDj?WNhb;o9E#aV|2p44HAxG`t9O-?nmVh{ZN=rxsy| zw>=f;NeR;(V&r-i={d&4NAAejxVWQ$=ssMFLt+b}wJ`2^>nHABL7vIXb~)sjA}I5C zCaycV3hDeomDXX_G53liA2(**EYs3x!jKUxIa~(fzO3)~;h^H;EAxEurdM^ISHoUf zf6I4>e}xY*NY+d+tSCU-W*klHiV@^!V%C{$_(qxVkg!nO#DN$ivvwGREtl=JFR#sv z%qaSsh+rm)FyTBYl!*QCjZ};LR~QWHtN;!C2#o$GOF-RMWa&hD+LoNr7Lh?Jwu0z+ z4-t7fwOoo&SK#Ww&%Z#a+sctF4?6Y{uV>55e zkLurNk5LmBip-Ub%Wp?|!tb+cAfc7-&?&@(KH)DnE;B_%N7hF`Z#YTNWuSo=nG2P+ z-NZoa)R8NDP<2up6PUXv)4R@1;mCU+8xiV!jf3}m3S+%);&!T|Gj@b(X`#b>lw?fH z`;k~!LroRh*HAipGBw2<0UG<|2E3}~A5jYON&7{pQB}|hSMiSz22JtQhgIwG#_7qg zEQk>|RiB!o?O39r;=+l>d5PDx8avI$P}ApTmPItC3ZD{N8)Wni-@nQxDOPLTd`dG= zQ&u@9$fUN;<}g@tgMbTp!48wpVIV3WtQgxbQUB-)#YCV7Eu%Z=e8%aOZr&ZncI_+H zkOxM%~SL@32xaMwwoju&kQWNLK8~jV7ti`D<=G9i0w6G8r+NsD{Na zCai1fwkZ`gEkRUWWP=+QiU_pJblBaN91ADQ7K6irQ4Qx5U>2VCToT)JHj&l_4VCDG zW_H_9HnzteII8j8n~@3)*-NvzguEICi+!^M55^nT)jcCEa}MxHrZmdGI?sQ9(RDE& z9JM_o*b)VsCT(e|tzqp@EZ!B;uhTyoY&EcNj?FZ%!?RS(n{c!&^_;O-8cE>hI#GLG z^;*=OzWjUJKPH2i@Fc%iTs7Uj^jCcWM&!-pua_y4y84ml`MH@oJvY)swm}_u~i2EKosASZ zI?}Tk%|aYZr`ZW4X)@9|^CnC>0xZ`<=%gY=COXcc)8D^>Ivh(a)stxfF6vrUsn0hA z9T+(VE~EiB!gL~zEtzt{k#h73k&ZR7C>yBGa-N>FQ&LHDucqfD@*M8amq1t)luZ_? ztYth#Rhf_2LBh4qWsnYx6w%EvM@l}?L5^Lvu}X4El_(jBcED+6;0aD%GNV+OJ8}X7 z{{#^opW4vFrv-K8)A*t6Swez|PY+{u@))c{LQzl`&pUbane;^Y4)Nz2ClV(o0Lmi? zGp@Wvkd@%aDB%gc$h30w=yiq8R6>Xo1I*hWYK=wWQD+sxHe^jYQv-v>ZZv{Q(ZkM6 zLuPE)egI3mDB7V^(<@!s`HQQRspnlEE9S|ia*Hmn4`kU2KF@zN6GD@w^oPdX12D?> z9j+)C=Zi2svBF)Is?sj|UA-WDD^FKou){W6-{ln>-*(L1VDSk2b?CrK{G*Bp(-pMz zslM5)@!I~wr#m5#1e2bTsY7GP#W|^V#e4tin*z6ejU)n^4zHWe^J0-Ix3ON`FGu}v zBv?m4w^!EZmXAe-5X|;RdKKN}Y0W?*TZ(w;pSMn}*N~ULedsv54w;)$)ZRf9Nu4HR zNIi@@7%({WL8q`5ygraao_~25_hzFd%6oJm>e&jql*#^EslJ#v312SC;7^~JB2X4{ z%oOEDzuz5HTi=>nCT6A%*p$}dlXzxHSKPe>9oB*lf7r3zA^#Kgt8IU25{L)@cs~RH zi2h{^Khs<%b1>N1iSxJVH~ts@VZkwl3kZb?O}f3Q$EF${Hx`PUc34k+Wm67jTaX8E zOGm}k!NLQ{gL?BDLd7p#>F;jjm~u|v@r*CabQmQ^x|qCfl&lKnShs#Q?br7CGlHHm znVDSb*xR??Uv-|H4qVvfkh3q(LV;ys(6^D_A8lWa@n}_3Gn=A}$e`G1R7-P*^cmSy zHB-53q2z^G=p>ls^M5{L>8jX?3oO%c5YOkC&!>aOT?k!b*Xg{X!Tn11!z4`U z6Rr^JrXO21z1FHtXU4a~@b0}wA#@yUQYDu4x2Yh4fYj*T3cVQSZ1jiNZ8bautpI@| zDPH^L4N@GN`c__1f2;8XhvXln247ayGxbnkG!-vqp{sh>=X&?`3(K7#Xx>wP<2xy?tEK=$wM*{7_i@G&xTvHs z2PPy^?>$g(L`kDfCfnyK_;~m6MLY^GmS~6yijPKU@{4fAXXdj`tqSzPJ$p)R2J|?1 zZL#0JLRxd4a)_dm&QhJnECnQ=L@2IL>)&ATq5#wK=`xQhOK15-<%_T!_H(Kdr?{2U zSyYFQP1MHHP_}t4{Wo8yUYj#b6prHXiz0bb?^I$*`XfR+1}>ggN^S{}X(}t+?tlsu zZudd1DT%IOVk!0eAjOnK7xX%X+g*@;O8qX#Hzm<6EK@<(Ev!OekqIwOyhWWVw#W@b zJodYOfOO+?CZNo$6fzC-O|B@NMFxIo?m+}XOt+6Z6{?GZ{pIr3o4NanhRlVhb>IwE z;M@*|xa+mWhDw06dP(Cc~j}^ z8X<>u4R6F1QSlC|3~C?RCzBEE7-i7g?TsM;5!{-xsxJ*lg|WIkF$Sm6h>%TToInP_ z!mZ#x}oM?KS_IHw_m@-7s znZ6n*dQAod$&ac$7X8AqNWXZsO6|IUtV@A}{`G?`KX7~gxx6YNQg0Xzhwl$Ke1U(4 zuc5vD|IiDE?e8NsR^ewWHTXh$k@_TE5IIO`lZe{48$mp;c0clo1)Hu=RY(CCRVS)l zf*+*?Cr&&b?{#HL#>ef^Q3A%HU(yC%E~B4EKXKQLFltWc7+Npv#6lk94kd53A=e=&I$ z+6C&^jE1R`zMK|_vb-&~ICSYEgyk29`{(hOvRGBWXv}No7QEcSl;=nwDfxPrN7abF z)Az=nUbW+Vej+u;hF{*%`ID*7w! z{X=w`@IR0KHxm4-mcPc}KXd>9@8EUTzeVI<#sBI9|17RZ@h9=$J))8vGJJIb01WuY MAMQ1=sDFO@KS=}K_5c6? literal 0 HcmV?d00001 diff --git a/zipreader.go b/zipreader.go new file mode 100644 index 0000000..a303eae --- /dev/null +++ b/zipreader.go @@ -0,0 +1,33 @@ +package main + +import ( + "archive/zip" + "io" +) + +type ProcessFunc func(io.Reader, *Options) error + +type ZipReader struct{} + +func (zr *ZipReader) Process(options *Options) error { + z, err := zip.OpenReader(options.filePath) + if err != nil { + return err + } + defer z.Close() + + for _, zFile := range z.File { + f, err := zFile.Open() + if err != nil { + return err + } + defer f.Close() + + err = process(f, options) + if err != nil { + return err + } + } + + return nil +}