commit 917b473fa93a225d62e0d6dc18b044d088908e61 Author: nxshock Date: Mon Apr 3 19:12:30 2023 +0500 Initial commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b735ec --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a626811 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 nxshock + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b57224 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# mssqlbulkloader + diff --git a/app.go b/app.go new file mode 100644 index 0000000..e78bb83 --- /dev/null +++ b/app.go @@ -0,0 +1,209 @@ +package main + +import ( + "database/sql" + "fmt" + "io" + "os" + "time" + _ "time/tzdata" + + _ "github.com/denisenkom/go-mssqldb" + "github.com/urfave/cli" +) + +var app = &cli.App{ + Version: "2023.03.27", + Usage: "bulk loader into Microsoft SQL Server", + HideHelp: true, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "filepath", + Usage: "input file path", + Required: true, + TakesFile: true}, + &cli.StringFlag{ + Name: "type", + Usage: "input file type", + Required: false, + Value: "auto", + }, + &cli.StringFlag{ + Name: "encoding", + Usage: "input file encoding", + Required: false, + Value: "utf8", + }, + &cli.StringFlag{ + Name: "sheetname", + Usage: "Excel file sheet name", + Required: false}, + &cli.StringFlag{ + Name: "server", + Usage: "database server address", + Value: "127.0.0.1"}, + &cli.StringFlag{ + Name: "database", + Usage: "database name", + Required: true}, + &cli.StringFlag{ + Name: "table", + Usage: "table name in schema.name format", + Required: true}, + &cli.StringFlag{ + Name: "fields", + Usage: "list of field types in [sifdt ]+ format", + Required: true}, + &cli.BoolFlag{ + Name: "create", + Usage: "create table"}, + &cli.BoolFlag{ + Name: "overwrite", + Usage: "overwrite existing table"}, + &cli.IntFlag{ + Name: "skiprows", + Usage: "number of rows to skip before read header"}, + &cli.BoolFlag{ + Name: "unknowncolumnnames", + Usage: "insert to table with unknown column names", + }, + &cli.StringFlag{ + Name: "timezone", + Usage: "Time zone (IANA Time Zone database format)", + Value: "Local", + }, + &cli.StringFlag{ + Name: "comma", + Usage: "CSV file delimiter", + Value: ",", + }, + &cli.StringFlag{ + Name: "dateformat", + Usage: "date format (Go style)", + Value: "02.01.2006"}, + &cli.StringFlag{ + Name: "timestampformat", + Usage: "timestamp format (Go style)", + Value: "02.01.2006 15:04:05"}, + &cli.StringFlag{ + Name: "decompress", + Usage: "decompressor name for archived files", + }, + &cli.BoolFlag{ + Name: "silent", + Usage: "disable output", + }, + }, + Action: func(c *cli.Context) error { + initLogger(c.Bool("silent")) + + var comma rune + if c.String("comma") == "\\t" { + comma = rune("\t"[0]) + } else { + comma = rune(c.String("comma")[0]) + } + + location, err := time.LoadLocation(c.String("timezone")) + if err != nil { + return fmt.Errorf("parse timezone: %w", err) + } + + options := &Options{ + filePath: c.String("filepath"), + fileType: c.String("type"), + sheetName: c.String("sheetname"), + server: c.String("server"), + database: c.String("database"), + tableName: c.String("table"), + fieldsTypes: c.String("fields"), + create: c.Bool("create"), + overwrite: c.Bool("overwrite"), + skipRows: c.Int("skiprows"), + encoding: c.String("encoding"), + dateFormat: c.String("dateformat"), + timestampFormat: c.String("timestampformat"), + timezone: location, + decompress: c.String("decompress"), + unknownColumnNames: c.Bool("unknowncolumnnames"), + silent: c.Bool("silent"), + comma: comma, + } + + if options.decompress != "" { + var archiveType ArchiveType + err = archiveType.UnmarshalText([]byte(options.decompress)) + if err != nil { + return err + } + + ar, err := archiveType.Open() + if err != nil { + return err + } + + err = ar.Process(options) + if err != nil { + return err + } + } else { + f, err := os.Open(options.filePath) + if err != nil { + return err + } + defer f.Close() + + err = process(f, options) + if err != nil { + return err + } + } + + logger.Print("Complete.") + + return nil + }} + +func process(r io.Reader, options *Options) error { + var fileType FileType + err := fileType.UnmarshalText([]byte(options.fileType)) + if err != nil { + return err + } + + reader, err := fileType.Open(r, options) + if err != nil { + return err + } + defer reader.Close() + + db, err := sql.Open("sqlserver", fmt.Sprintf("sqlserver://%s?database=%s", options.server, options.database)) + if err != nil { + return fmt.Errorf("open database: %w", err) + } + defer db.Close() + + tx, err := db.Begin() + if err != nil { + return fmt.Errorf("begin transaction: %w", err) + } + + err = prepareTable(reader, tx) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare table: %w", err) + } + + err = insertData(reader, tx) + if err != nil { + tx.Rollback() + return fmt.Errorf("insert data: %w", err) + } + + err = tx.Commit() + if err != nil { + return fmt.Errorf("commit transaction: %w", err) + } + + return nil +} diff --git a/archivetypes.go b/archivetypes.go new file mode 100644 index 0000000..00ffa26 --- /dev/null +++ b/archivetypes.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" +) + +type ArchiveType int + +const ( + AutoDetectArchiveType ArchiveType = iota + Zip +) + +type ArchiveProcessor interface { + Process(options *Options) error +} + +func (ft ArchiveType) MarshalText() (text []byte, err error) { + switch ft { + case AutoDetectArchiveType: + return []byte("auto"), nil + case Zip: + return []byte("zip"), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft ArchiveType) Open() (ArchiveProcessor, error) { + switch ft { + case AutoDetectArchiveType: + case Zip: + return new(ZipReader), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft *ArchiveType) UnmarshalText(text []byte) error { + switch string(text) { + case "auto": + *ft = AutoDetectArchiveType + return nil + case "zip": + *ft = Zip + return nil + } + + return fmt.Errorf(`unknown format code "%s"`, string(text)) +} diff --git a/charsets.go b/charsets.go new file mode 100644 index 0000000..e359376 --- /dev/null +++ b/charsets.go @@ -0,0 +1,66 @@ +package main + +import ( + "fmt" + "io" + + "github.com/dimchansky/utfbom" + "golang.org/x/text/encoding/charmap" +) + +type Charset interface { + String(string) (string, error) + Reader(io.Reader) io.Reader +} + +type Charsets map[string]Charset + +var charsets = make(Charsets) + +func (c Charsets) Register(name string, charset Charset) { + c[name] = charset +} + +func (c Charsets) DecodeString(name string, input string) (string, error) { + decoder, ok := c[name] + if !ok { + return "", fmt.Errorf("unknown decoder: %s", name) + } + + if decoder == nil { + return input, nil + } + + return decoder.String(input) +} + +func (c Charsets) DecodeReader(name string, input io.Reader) (io.Reader, error) { + decoder, ok := charsets[name] + if !ok { + return nil, fmt.Errorf("unknown decoder: %s", name) + } + + if decoder == nil { + return input, nil + } + + return decoder.Reader(input), nil +} + +func init() { + charsets.Register("utf8", utf8decoder) + charsets.Register("win1251", charmap.Windows1251.NewDecoder()) + charsets.Register("cp866", charmap.CodePage866.NewDecoder()) +} + +type Utf8decoder struct{} + +var utf8decoder = new(Utf8decoder) + +func (d *Utf8decoder) Reader(r io.Reader) io.Reader { + return utfbom.SkipOnly(r) +} + +func (d *Utf8decoder) String(s string) (string, error) { + return s, nil +} diff --git a/fieldtypes.go b/fieldtypes.go new file mode 100644 index 0000000..c816c25 --- /dev/null +++ b/fieldtypes.go @@ -0,0 +1,137 @@ +package main + +import ( + "fmt" + "strconv" + "strings" + "time" +) + +type CustomDateParser interface { + Reader + ParseDate(rawValue string) (time.Time, error) +} + +type CustomDateTimeParser interface { + Reader + ParseDateTime(rawValue string) (time.Time, error) +} + +type FieldType int + +const ( + Skip FieldType = iota + Integer + String + Float + Money + Date + Timestamp +) + +func (ft FieldType) ParseValue(reader Reader, s string) (any, error) { + s = strings.TrimSpace(s) + + if s == "" { + return nil, nil + } + + switch ft { + case String: + return s, nil + case Integer: + return strconv.ParseInt(s, 10, 64) + case Float: + return strconv.ParseFloat(strings.ReplaceAll(s, ",", "."), 64) + case Date: + if i, ok := reader.(CustomDateParser); ok { + t, err := i.ParseDate(s) + if err != nil { + return nil, err + } + return /*t.Truncate(24 * time.Hour)*/ t, nil // TODO: проверить, нужен ли Truncate + } + + return time.ParseInLocation(reader.Options().dateFormat, s, reader.Options().timezone) + case Timestamp: + if i, ok := reader.(CustomDateTimeParser); ok { + t, err := i.ParseDateTime(s) + if err != nil { + return nil, err + } + return t.Truncate(24 * time.Second), nil + } + + return time.ParseInLocation(reader.Options().timestampFormat, s, reader.Options().timezone) + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft FieldType) SqlFieldType() string { + switch ft { + case Integer: + return "bigint" + case String: + return "nvarchar(255)" + case Float: + return "float" + case Money: + panic("do not implemented - see https://github.com/denisenkom/go-mssqldb/issues/460") // TODO: https://github.com/denisenkom/go-mssqldb/issues/460 + case Date: + return "date" + case Timestamp: + return "datetime2" + } + + return "" +} + +func (ft FieldType) MarshalText() (text []byte, err error) { + switch ft { + case Skip: + return []byte(" "), nil + case Integer: + return []byte("i"), nil + case String: + return []byte("s"), nil + case Float: + return []byte("f"), nil + case Money: + return []byte("m"), nil + case Date: + return []byte("d"), nil + case Timestamp: + return []byte("t"), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft *FieldType) UnmarshalText(text []byte) error { + switch string(text) { + case " ": + *ft = Skip + return nil + case "i": + *ft = Integer + return nil + case "s": + *ft = String + return nil + case "f": + *ft = Float + return nil + case "m": + *ft = Money + return nil + case "d": + *ft = Date + return nil + case "t": + *ft = Timestamp + return nil + } + + return fmt.Errorf(`unknown format code "%s"`, string(text)) +} diff --git a/filetypes.go b/filetypes.go new file mode 100644 index 0000000..54e1b6b --- /dev/null +++ b/filetypes.go @@ -0,0 +1,63 @@ +package main + +import ( + "fmt" + "io" +) + +type FileType int + +const ( + AutoDetect FileType = iota + Csv + Xlsx + Dbf +) + +func (ft FileType) MarshalText() (text []byte, err error) { + switch ft { + case AutoDetect: + return []byte("auto"), nil + case Csv: + return []byte("csv"), nil + case Xlsx: + return []byte("xlsx"), nil + case Dbf: + return []byte("dbf"), nil + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft FileType) Open(r io.Reader, options *Options) (Reader, error) { + switch ft { + case AutoDetect: + case Csv: + return newCsvReader(r, options) + case Xlsx: + return newXlsxReader(r, options) + case Dbf: + return newDbfReader(r, options) + } + + return nil, fmt.Errorf("unknown type id = %d", ft) +} + +func (ft *FileType) UnmarshalText(text []byte) error { + switch string(text) { + case "auto": + *ft = AutoDetect + return nil + case "csv": + *ft = Csv + return nil + case "xlsx": + *ft = Xlsx + return nil + case "dbf": + *ft = Dbf + return nil + } + + return fmt.Errorf(`unknown format code "%s"`, string(text)) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..212351d --- /dev/null +++ b/go.mod @@ -0,0 +1,30 @@ +module github.com/nxshock/mssqlbulkloader + +go 1.20 + +require ( + github.com/SebastiaanKlippert/go-foxpro-dbf v1.2.0 + github.com/denisenkom/go-mssqldb v0.12.3 + github.com/dimchansky/utfbom v1.1.1 + github.com/stretchr/testify v1.8.2 + github.com/urfave/cli v1.22.12 + github.com/xuri/excelize/v2 v2.7.0 + golang.org/x/text v0.8.0 +) + +require ( + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect + github.com/golang-sql/sqlexp v0.1.0 // indirect + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/richardlehane/mscfb v1.0.4 // indirect + github.com/richardlehane/msoleps v1.0.3 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 // indirect + github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 // indirect + golang.org/x/crypto v0.7.0 // indirect + golang.org/x/net v0.8.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ef0e636 --- /dev/null +++ b/go.sum @@ -0,0 +1,104 @@ +github.com/Azure/azure-sdk-for-go/sdk/azcore v0.19.0/go.mod h1:h6H6c8enJmmocHUbLiiGY6sx7f9i+X3m1CHdd5c6Rdw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.11.0/go.mod h1:HcM1YX14R7CJcghJGOYCgdezslRSVzqwLf/q+4Y2r/0= +github.com/Azure/azure-sdk-for-go/sdk/internal v0.7.0/go.mod h1:yqy467j36fJxcRV2TzfVZ1pCb5vxm4BtZPUdYWe/Xo8= +github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/SebastiaanKlippert/go-foxpro-dbf v1.2.0 h1:11OnIKzaY952Atj9pLewuG09DdRv6CCm2XnZaTEcWn0= +github.com/SebastiaanKlippert/go-foxpro-dbf v1.2.0/go.mod h1:VnyVS1nyFfnCduBoWvjYuRp5Ce3KqZThky+ECDvJmEA= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/denisenkom/go-mssqldb v0.12.3 h1:pBSGx9Tq67pBOTLmxNuirNTeB8Vjmf886Kx+8Y+8shw= +github.com/denisenkom/go-mssqldb v0.12.3/go.mod h1:k0mtMFOnU+AihqFxPMiF05rtiDrorD1Vrm1KEz5hxDo= +github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U= +github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= +github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= +github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI= +github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM= +github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= +github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8= +github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 h1:6932x8ltq1w4utjmfMPVj09jdMlkY0aiA6+Skbtl3/c= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/excelize/v2 v2.7.0 h1:Hri/czwyRCW6f6zrCDWXcXKshlq4xAZNpNOpdfnFhEw= +github.com/xuri/excelize/v2 v2.7.0/go.mod h1:ebKlRoS+rGyLMyUx3ErBECXs/HNYqyj+PbkkKRK5vSI= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 h1:OAmKAfT06//esDdpi/DZ8Qsdt4+M5+ltca05dA5bG2M= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= +golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/logger.go b/logger.go new file mode 100644 index 0000000..23dc2b8 --- /dev/null +++ b/logger.go @@ -0,0 +1,21 @@ +package main + +import ( + "io" + "log" + "os" +) + +type Logger struct { + silent bool +} + +var logger *log.Logger + +func initLogger(silent bool) { + if silent { + logger = log.New(io.Discard, "", 0) + } else { + logger = log.New(os.Stderr, "", 0) + } +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..e1cb2f3 --- /dev/null +++ b/main.go @@ -0,0 +1,17 @@ +package main + +import ( + "log" + "os" +) + +func init() { + log.SetFlags(0) +} + +func main() { + err := app.Run(os.Args) + if err != nil { + log.Fatalln(err) + } +} diff --git a/make.bat b/make.bat new file mode 100644 index 0000000..5b21e00 --- /dev/null +++ b/make.bat @@ -0,0 +1 @@ +go build -trimpath -buildmode=pie -ldflags "-linkmode external -s -w" \ No newline at end of file diff --git a/options.go b/options.go new file mode 100644 index 0000000..27dd2e9 --- /dev/null +++ b/options.go @@ -0,0 +1,62 @@ +package main + +import "time" + +type Options struct { + // Source file path + filePath string + + // Source file type + fileType string + + // Server address + server string + + // Database name + database string + + // Table name + tableName string + + // comma delimiter for CSV files + comma rune + + // Number of rows to skip before reading of header + skipRows int + + // List of fiels types + fieldsTypes string + + // Date format + dateFormat string + + // Date+time format + timestampFormat string + + // Sheet name for Excel file + sheetName string + + // CSV/DBF codepage + encoding string + + // create table before inserting data + create bool + + // Drop existing table before creating + overwrite bool + + // Disable progress output + silent bool + + // Input file dates timezone + timezone *time.Location + + // Decompress before process + decompress string + + // Unknown column names + unknownColumnNames bool + + // Column names list + columnNames []string +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..d4fcbaa --- /dev/null +++ b/reader.go @@ -0,0 +1,28 @@ +package main + +type Reader interface { + // GetHeaders returns list of column names + GetHeader() []string + + // GetRows returns next one file row or io.EOF + GetRow(asString bool) ([]any, error) + + // Options returns options + Options() *Options + + Close() error +} + +func getHeader(r Reader) ([]string, error) { + headerAny, err := r.GetRow(true) + if err != nil { + return nil, err + } + + header := make([]string, 0, len(headerAny)) + for _, v := range headerAny { + header = append(header, v.(string)) + } + + return header, nil +} diff --git a/readercsv.go b/readercsv.go new file mode 100644 index 0000000..07e7645 --- /dev/null +++ b/readercsv.go @@ -0,0 +1,101 @@ +package main + +import ( + "bufio" + "encoding/csv" + "fmt" + "io" +) + +type CsvReader struct { + reader *csv.Reader + header []string + options *Options +} + +func NewCsvReader(r io.Reader, options *Options) (*CsvReader, error) { + return newCsvReader(r, options) +} + +func newCsvReader(r io.Reader, options *Options) (*CsvReader, error) { + decoder, err := charsets.DecodeReader(options.encoding, r) + if err != nil { + return nil, fmt.Errorf("enable decoder: %v", options.encoding) + } + + bufReader := bufio.NewReaderSize(decoder, 4*1024*1024) + + for i := 0; i < options.skipRows; i++ { + _, _, err := bufReader.ReadLine() + if err != nil { + return nil, fmt.Errorf("skip rows: %v", err) + } + } + + re := csv.NewReader(bufReader) + re.Comma = options.comma + re.FieldsPerRecord = len(options.fieldsTypes) + + csvReader := &CsvReader{ + reader: re, + options: options} + + header, err := getHeader(csvReader) + if err != nil { + return nil, err + } + + csvReader.header = header + + return csvReader, nil + +} + +func (r *CsvReader) GetHeader() []string { + return r.header +} + +func (r *CsvReader) Options() *Options { + return r.options +} + +func (r *CsvReader) GetRow(asStrings bool) ([]any, error) { + record, err := r.reader.Read() + if err == io.EOF { + return nil, err + } + if err != nil { + return nil, fmt.Errorf("read record: %v", err) + } + + var args []any + + for i, v := range record { + var fieldType FieldType + err = fieldType.UnmarshalText([]byte{r.options.fieldsTypes[i]}) + if err != nil { + return nil, fmt.Errorf("get record type: %v", err) + } + + if fieldType == Skip { + continue + } + + if asStrings { + fieldType = String + } + + parsedValue, err := fieldType.ParseValue(r, v) + if err != nil { + return nil, fmt.Errorf("parse value: %v", err) + } + + args = append(args, parsedValue) + } + + return args, nil +} + +func (r *CsvReader) Close() error { + return nil +} diff --git a/readercsv_test.go b/readercsv_test.go new file mode 100644 index 0000000..f2f6607 --- /dev/null +++ b/readercsv_test.go @@ -0,0 +1,42 @@ +package main + +import ( + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestCsvReaderBasic(t *testing.T) { + f, err := os.Open("testdata/csv/9729337841_20032023_084313667.csv") + assert.NoError(t, err) + + options := &Options{ + encoding: "win1251", + comma: rune(";"[0]), + skipRows: 3, + fieldsTypes: "s ttffsssss", + dateFormat: "02.01.2006", + timestampFormat: "02.01.2006 15:04:05", + timezone: time.Local} + + csvReader, err := NewCsvReader(f, options) + assert.NoError(t, err) + + assert.Equal(t, []string{"RRN", "Дата операции", "Дата ПП", "Сумма операции", "Сумма расчета", "Номер карты", "Код авторизации", "Тип операции", "Доп. информация_1", "Доп. информация_2"}, csvReader.GetHeader()) + + row, err := csvReader.GetRow(false) + assert.NoError(t, err) + + t1 := time.Date(2023, 03, 19, 17, 49, 35, 0, time.Local) + t2 := time.Date(2023, 03, 20, 0, 0, 0, 0, time.Local) + assert.Equal(t, []any{"307814009186", t1, t2, 499.00, 488.52, "522598******7141", "REZE64", "Покупка", "35068281112", "307817403283"}, row) + + row, err = csvReader.GetRow(false) + assert.Equal(t, err, io.EOF) + + err = csvReader.Close() + assert.NoError(t, err) +} diff --git a/readerdbf.go b/readerdbf.go new file mode 100644 index 0000000..ea6fe30 --- /dev/null +++ b/readerdbf.go @@ -0,0 +1,124 @@ +package main + +import ( + "bytes" + "fmt" + "io" + "time" + + dbf "github.com/SebastiaanKlippert/go-foxpro-dbf" +) + +func init() { + dbf.SetValidFileVersionFunc(func(version byte) error { + return nil + }) +} + +type DbfReader struct { + reader *dbf.DBF + header []string + options *Options +} + +func NewDbfReader(r io.Reader, options *Options) (*DbfReader, error) { + return newDbfReader(r, options) +} + +func newDbfReader(r io.Reader, options *Options) (*DbfReader, error) { + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + br := bytes.NewReader(b) + + re, err := dbf.OpenStream(br, nil, &dbf.UTF8Decoder{}) + if err != nil { + return nil, err + } + + dbfReader := &DbfReader{ + reader: re, + options: options} + + fullHeader := re.FieldNames() + var header []string + for i, v := range options.fieldsTypes { + if v == ' ' { + continue + } + + s, err := charsets.DecodeString(options.encoding, fullHeader[i]) + if err != nil { + return nil, err + } + + header = append(header, s) + } + + dbfReader.header = header + + return dbfReader, nil +} + +func (r *DbfReader) GetHeader() []string { + return r.header +} + +func (r *DbfReader) Options() *Options { + return r.options +} + +func (r *DbfReader) GetRow(asStrings bool) ([]any, error) { + if r.reader.EOF() { + return nil, io.EOF + } + + record, err := r.reader.Record() + if err != nil { + return nil, fmt.Errorf("read record: %v", err) + } + + r.reader.Skip(1) + + var args []any + + for i, v := range record.FieldSlice() { + var fieldType FieldType + err = fieldType.UnmarshalText([]byte{r.options.fieldsTypes[i]}) + if err != nil { + return nil, fmt.Errorf("get record type: %v", err) + } + + if fieldType == Skip { + continue + } + + decV, err := charsets.DecodeString(r.options.encoding, fmt.Sprint(v)) + if err != nil { + return nil, err + } + + parsedValue, err := fieldType.ParseValue(r, decV) + if err != nil { + return nil, fmt.Errorf("parse value: %v", err) + } + + args = append(args, parsedValue) + } + + return args, nil +} + +func (r *DbfReader) Close() error { + return nil +} + +func (r *DbfReader) ParseDate(rawValue string) (time.Time, error) { + return time.ParseInLocation("02.01.2006", rawValue, r.options.timezone) +} + +func (r *DbfReader) ParseDateTime(rawValue string) (time.Time, error) { + return time.ParseInLocation("02.01.2006 15:04:05", rawValue, r.options.timezone) +} diff --git a/readerdbf_test.go b/readerdbf_test.go new file mode 100644 index 0000000..18753c8 --- /dev/null +++ b/readerdbf_test.go @@ -0,0 +1,38 @@ +package main + +import ( + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestDbfReaderBasic(t *testing.T) { + f, err := os.Open("testdata/dbf/38_052QB.dbf") + assert.NoError(t, err) + + options := &Options{ + fieldsTypes: "sssssstdffsss", + timezone: time.Local, + encoding: "cp866"} + + dbfReader, err := NewDbfReader(f, options) + assert.NoError(t, err) + + assert.Equal(t, []string{"TRAN_ID", "БАНК", "ОТДЕЛЕНИЕ", "ТОЧКА", "НАЗВАНИЕ", "ТЕРМИНАЛ", "ДАТА_ТРАН", "ДАТА_РАСЧ", "СУММА_ТРАН", "СУММА_РАСЧ", "КАРТА", "КОД_АВТ", "ТИП"}, dbfReader.GetHeader()) + + row, err := dbfReader.GetRow(false) + assert.NoError(t, err) + + t1 := time.Date(2023, 02, 20, 5, 57, 12, 0, time.Local) + t2 := time.Date(2023, 02, 21, 0, 0, 0, 0, time.Local) + assert.Equal(t, []any{"719089383780", "44", "8644", "570000009312", "STOLOVAYA TSPP", "844417", t1, t2, 1757.08, 1713.15, "536829XXXXXX9388", "UM1TS8", "D"}, row) + + row, err = dbfReader.GetRow(false) + assert.Equal(t, err, io.EOF) + + err = dbfReader.Close() + assert.NoError(t, err) +} diff --git a/readerxlsx.go b/readerxlsx.go new file mode 100644 index 0000000..4b094ba --- /dev/null +++ b/readerxlsx.go @@ -0,0 +1,159 @@ +package main + +import ( + "errors" + "fmt" + "io" + "strconv" + "time" + + "github.com/xuri/excelize/v2" +) + +type XlsxReader struct { + streamReader *excelize.File + rows *excelize.Rows + header []string + options *Options +} + +func NewXlsxReader(r io.Reader, options *Options) (*XlsxReader, error) { + return newXlsxReader(r, options) +} + +func newXlsxReader(r io.Reader, options *Options) (*XlsxReader, error) { + streamReader, err := excelize.OpenReader(r) + if err != nil { + return nil, fmt.Errorf("open reader: %w", err) + } + + sheetName := options.sheetName + if sheetName == "" { + if len(streamReader.GetSheetList()) == 0 { + streamReader.Close() + return nil, fmt.Errorf("get sheet list: %w", errors.New("file does not contains any sheets")) + } + sheetName = streamReader.GetSheetList()[0] + } + + rows, err := streamReader.Rows(sheetName) + if err != nil { + streamReader.Close() + return nil, fmt.Errorf("read rows: %w", err) + } + + xlsxReader := &XlsxReader{ + streamReader: streamReader, + options: options, + rows: rows} + + for i := 0; i < options.skipRows; i++ { + _, err := xlsxReader.GetRow(true) + if err != nil { + streamReader.Close() + return nil, fmt.Errorf("skip rows: %w", err) + } + } + + header, err := getHeader(xlsxReader) + if err != nil { + streamReader.Close() + return nil, fmt.Errorf("read header: %w", err) + } + xlsxReader.header = header + + return xlsxReader, nil + +} + +func (r *XlsxReader) GetHeader() []string { + return r.header +} + +func (r *XlsxReader) Options() *Options { + return r.options +} + +func (r *XlsxReader) GetRow(asStrings bool) ([]any, error) { + end := !r.rows.Next() + if end { + return nil, io.EOF + } + + record, err := r.rows.Columns(excelize.Options{RawCellValue: true}) + if err != nil { + return nil, err + } + + var args []any + + for i, v := range record { + var fieldType FieldType + err = fieldType.UnmarshalText([]byte{r.options.fieldsTypes[i]}) + if err != nil { + return nil, fmt.Errorf("get record type: %v", err) + } + + if fieldType == Skip { + continue + } + if asStrings { + fieldType = String + } + + parsedValue, err := fieldType.ParseValue(r, v) + if err != nil { + return nil, fmt.Errorf("parse value: %v", err) + } + + args = append(args, parsedValue) + } + + return args, nil +} + +func (r *XlsxReader) Close() error { + err := r.rows.Close() + if err != nil { + return err + } + + err = r.streamReader.Close() + if err != nil { + return err + } + + return nil +} + +func (r *XlsxReader) ParseDate(rawValue string) (time.Time, error) { + f, err := strconv.ParseFloat(rawValue, 64) + if err != nil { + return time.Time{}, err + } + + t, err := excelize.ExcelDateToTime(f, false) + if err != nil { + return time.Time{}, err + } + + t = time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), r.options.timezone) + + return t, nil +} + +func (r *XlsxReader) ParseDateTime(rawValue string) (time.Time, error) { + f, err := strconv.ParseFloat(rawValue, 64) + if err != nil { + return time.Time{}, err + } + + t, err := excelize.ExcelDateToTime(f, false) + if err != nil { + return time.Time{}, err + } + + t = time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), r.options.timezone) + + return t, nil +} diff --git a/readerxlsx_test.go b/readerxlsx_test.go new file mode 100644 index 0000000..3bb56c6 --- /dev/null +++ b/readerxlsx_test.go @@ -0,0 +1,36 @@ +package main + +import ( + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestXlsxReaderBasic(t *testing.T) { + f, err := os.Open("testdata/xlsx/38_049RMZ_all.xlsx") + assert.NoError(t, err) + + options := &Options{skipRows: 0, fieldsTypes: "s sssssssssttfffssss", timezone: time.Local} + + xlsxReader, err := NewXlsxReader(f, options) + assert.NoError(t, err) + + assert.Equal(t, []string{"ИНН предприятия", "Город", "Адрес ТСТ", "Обслуживающее отделение", "Расчетное отделение", "RRN операции", "Название ТСТ", "Мерчант ТСТ", "Расчетный мерчант", "Терминал", "Дата проведения операции", "Дата обработки операции", "Сумма операции", "Комиссия за операцию", "Сумма к расчету", "Карта", "Код авторизации", "Тип операции", "Тип карты"}, xlsxReader.GetHeader()) + + row, err := xlsxReader.GetRow(false) + assert.NoError(t, err) + + t1 := time.Date(2023, 02, 17, 1, 5, 12, 0, time.Local) + t2 := time.Date(2023, 02, 18, 6, 24, 24, 0, time.Local) // TODO: в excel-файле 37 секунд? + + assert.Equal(t, []any{"7710146208", nil, nil, "99386901", "99386901", "304722813269", "TSENTRALNYY TELEGRAF", "780000334079", "780000334079", "10432641", t1, t2, 50.00, 0.80, 49.20, "553691******1214", "026094", "D", "MC OTHER"}, row) + + row, err = xlsxReader.GetRow(false) + assert.Equal(t, io.EOF, err) + + err = xlsxReader.Close() + assert.NoError(t, err) +} diff --git a/sql.go b/sql.go new file mode 100644 index 0000000..614c9a5 --- /dev/null +++ b/sql.go @@ -0,0 +1,151 @@ +package main + +import ( + "database/sql" + "fmt" + "io" + "os" + "strings" + + mssql "github.com/denisenkom/go-mssqldb" +) + +// TODO: add escaping +func prepareTable(reader Reader, tx *sql.Tx) error { + if reader.Options().unknownColumnNames { + var columnNames []string + + sql := fmt.Sprintf("SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA + '.' + TABLE_NAME = '%s' ORDER BY ORDINAL_POSITION", reader.Options().tableName) + rows, err := tx.Query(sql) + if err != nil { + return fmt.Errorf("get column names from database: %w", err) + } + defer rows.Close() + + for rows.Next() { + if rows.Err() != nil { + return fmt.Errorf("get column names from database: %w", err) + } + var columnName string + err = rows.Scan(&columnName) + if err != nil { + return fmt.Errorf("get column names from database: %w", err) + } + columnNames = append(columnNames, columnName) + } + + reader.Options().columnNames = columnNames + } else { + reader.Options().columnNames = reader.GetHeader() + } + + if !reader.Options().create && !reader.Options().overwrite { + return nil + } + + if !reader.Options().create && reader.Options().overwrite { + logger.Println("Truncating table...") + _, err := tx.Exec(fmt.Sprintf("TRUNCATE TABLE %s", reader.Options().tableName)) + if err != nil { + return err + } + } + + if reader.Options().overwrite { + logger.Println("Dropping table...") + _, err := tx.Exec(fmt.Sprintf("IF object_id('%s', 'U') IS NOT NULL DROP TABLE %s", reader.Options().tableName, reader.Options().tableName)) + if err != nil { + return fmt.Errorf("drop table: %w", err) + } + } + + sql := fmt.Sprintf("CREATE TABLE %s (", reader.Options().tableName) + + fieldTypes := strings.ReplaceAll(reader.Options().fieldsTypes, " ", "") + + for i, columnName := range reader.Options().columnNames { + var fieldType FieldType + err := fieldType.UnmarshalText([]byte{fieldTypes[i]}) + if err != nil { + return fmt.Errorf("detect field type: %w", err) + } + + sql += fmt.Sprintf(`"%s" %s`, columnName, fieldType.SqlFieldType()) + + if i+1 < len(reader.GetHeader()) { + sql += ", " + } else { + sql += ") WITH (DATA_COMPRESSION = PAGE)" // TODO: add optional params + } + } + + logger.Println("Creating table...") + logger.Println(sql) + _, err := tx.Exec(sql) + if err != nil { + return fmt.Errorf("execute table creation: %w", err) + } + + return nil +} + +func insertData(reader Reader, tx *sql.Tx) error { + columnNames := reader.GetHeader() + if reader.Options().unknownColumnNames { + columnNames = reader.Options().columnNames + } + + sql := mssql.CopyIn(reader.Options().tableName, mssql.BulkOptions{Tablock: true}, columnNames...) + + stmt, err := tx.Prepare(sql) + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("prepare statement: %w", err) + } + + n := 0 + for { + if n%100000 == 0 { + if !reader.Options().silent { + fmt.Fprintf(os.Stderr, "Processed %d records...\r", n) + } + } + + record, err := reader.GetRow(false) + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("read record: %w", err) + } + + _, err = stmt.Exec(record...) + if err != nil { + _ = stmt.Close() + _ = tx.Rollback() + return fmt.Errorf("execute statement: %w", err) + } + n++ + } + result, err := stmt.Exec() + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("execute statement: %w", err) + } + rowsAffected, err := result.RowsAffected() + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("calc rows affected: %w", err) + } + if !reader.Options().silent { + fmt.Fprintf(os.Stderr, "Processed %d records. \n", rowsAffected) + } + + err = stmt.Close() + if err != nil { + _ = tx.Rollback() + return fmt.Errorf("close statement: %w", err) + } + + return nil +} diff --git a/testdata/csv/9729337841_20032023_084313667.csv b/testdata/csv/9729337841_20032023_084313667.csv new file mode 100644 index 0000000..634ac4b --- /dev/null +++ b/testdata/csv/9729337841_20032023_084313667.csv @@ -0,0 +1,5 @@ +skip +skip +skip +RRN; ;; ; ; ; ; ; ; ; ; ; ;. _1;. _2 +307814009186; ; ;781000815902;WINK;28403560;19.03.2023 17:49:35;20.03.2023 00:00:00;499,00;488,52;522598******7141;REZE64;;35068281112;307817403283 diff --git a/testdata/dbf/38_052QB.dbf b/testdata/dbf/38_052QB.dbf new file mode 100644 index 0000000..fd127ab Binary files /dev/null and b/testdata/dbf/38_052QB.dbf differ diff --git a/testdata/xlsx/38_049RMZ_all.xlsx b/testdata/xlsx/38_049RMZ_all.xlsx new file mode 100644 index 0000000..bae2e0f Binary files /dev/null and b/testdata/xlsx/38_049RMZ_all.xlsx differ diff --git a/zipreader.go b/zipreader.go new file mode 100644 index 0000000..a303eae --- /dev/null +++ b/zipreader.go @@ -0,0 +1,33 @@ +package main + +import ( + "archive/zip" + "io" +) + +type ProcessFunc func(io.Reader, *Options) error + +type ZipReader struct{} + +func (zr *ZipReader) Process(options *Options) error { + z, err := zip.OpenReader(options.filePath) + if err != nil { + return err + } + defer z.Close() + + for _, zFile := range z.File { + f, err := zFile.Open() + if err != nil { + return err + } + defer f.Close() + + err = process(f, options) + if err != nil { + return err + } + } + + return nil +}