package main
import (
"fmt"
"os"
"log"
"strconv"
"image/color"
"github.com/go-gota/gota/dataframe"
"github.com/go-gota/gota/series"
"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/plotutil"
"gonum.org/v1/plot/vg"
"gonum.org/v1/plot/vg/draw"
)
func Preprocess(df dataframe.DataFrame) dataframe.DataFrame{
// DF := df.Select([]string{"sepal_length", "sepal_width", "species"})
DF := df.Drop([]string{"petal_length", "petal_width"})
return DF
}
func SeparateData(df dataframe.DataFrame, specie string) dataframe.DataFrame {
// Filter
DF := df.Filter(
dataframe.F{
Colname: "species",
Comparator: series.Eq,
Comparando: specie, // "setosa"
},
)
return DF
}
func makeScatterPlot(df dataframe.DataFrame) *plotter.Scatter {
records := df.Records()
n := len(records)
pts := make(plotter.XYs, n-1)
for i, r := range records {
if i == 0 {
// Skip colname
continue
}else if i == n {
// len(records) == n but len(pts) == n-1
break
}
// str to float64
pts[i-1].X, _ = strconv.ParseFloat(r[0], 64)
pts[i-1].Y, _ = strconv.ParseFloat(r[1], 64)
}
// fmt.Println(pts)
s, err := plotter.NewScatter(pts)
if err != nil {
log.Fatal(err)
}
return s
}
func makeHistValues(df dataframe.DataFrame, bin int) (*plotter.Histogram, *plotter.Histogram){
records := df.Records()
n := len(records)
ptv1 := make(plotter.Values, n-1)
ptv2 := make(plotter.Values, n-1)
for i, r := range records {
if i == 0 {
// Skip colname
continue
}else if i == n {
// len(records) == n but len(pts) == n-1
break
}
// str to float64
ptv1[i-1], _ = strconv.ParseFloat(r[0], 64)
ptv2[i-1], _ = strconv.ParseFloat(r[1], 64)
}
// fmt.Println(ptv1)
// fmt.Println(ptv2)
h1, err := plotter.NewHist(ptv1, bin)
if err != nil {
log.Fatal(err)
}
h2, err := plotter.NewHist(ptv2, bin)
if err != nil {
log.Fatal(err)
}
return h1, h2
}
func SaveScatterPlot(df1, df2, df3 dataframe.DataFrame, species []string) {
// Create a new plot
p := plot.New()
// Set its title and axis labels
p.Title.Text = "Sepal length & width"
p.X.Label.Text = "length"
p.Y.Label.Text = "width"
p.Add(plotter.NewGrid())
// Make a scatter plotter
sp1 := makeScatterPlot(df1)
sp2 := makeScatterPlot(df2)
sp3 := makeScatterPlot(df3)
// Set color with "gonum.org/v1/plot/plotutil"
sp1.GlyphStyle.Color = plotutil.Color(0)
sp2.GlyphStyle.Color = plotutil.Color(1)
sp3.GlyphStyle.Color = plotutil.Color(2)
// Set color with "image/color"
// sp1.GlyphStyle.Color = color.RGBA{R: 128, G: 255, B: 255, A: 128}
// sp2.GlyphStyle.Color = color.RGBA{R: 255, G: 128, B: 255, A: 128}
// sp2.GlyphStyle.Color = color.RGBA{R: 255, G: 255, B: 128, A: 128}
// Set shape
sp1.Shape = &draw.CircleGlyph{}
sp2.Shape = &draw.PyramidGlyph{}
sp3.Shape = &draw.BoxGlyph{}
// Add the plotters to the plot, with a legend
p.Add(sp1)
p.Add(sp2)
p.Add(sp3)
p.Legend.Add(species[0], sp1)
p.Legend.Add(species[1], sp2)
p.Legend.Add(species[2], sp3)
// Set the range of the axis
// p.X.Min = 3
// p.X.Max = 9
// p.Y.Min = 1
// p.Y.Max = 5
// Save the plot to a PNG file
if err := p.Save(4*vg.Inch, 4*vg.Inch, "./images/ScatterPlot.png"); err != nil {
log.Fatal(err)
}
}
func SaveHistogram(df1, df2, df3 dataframe.DataFrame, species []string) {
// Create a new plot
p := plot.New()
// Set its title and axis labels
p.Title.Text = "Sepal Histogram"
p.X.Label.Text = "length"
// Make a histogram values
bin := 10
histA, _ := makeHistValues(df1, bin)
histB, _ := makeHistValues(df2, bin)
histC, _ := makeHistValues(df3, bin)
// Normalize
histA.Normalize(1)
histB.Normalize(1)
histC.Normalize(1)
// Set color with "gonum.org/v1/plot/plotutil"
// histA.FillColor = plotutil.Color(0)
// histB.FillColor = plotutil.Color(1)
// histC.FillColor = plotutil.Color(2)
// Set color with "image/color"
histA.FillColor = color.RGBA{R: 128, G: 255, B: 255, A: 128}
histB.FillColor = color.RGBA{R: 255, G: 128, B: 255, A: 128}
histC.FillColor = color.RGBA{R: 255, G: 255, B: 128, A: 128}
// Set line style
histA.LineStyle.Width = vg.Length(0.5)
histB.LineStyle.Width = vg.Length(0.5)
histC.LineStyle.Width = vg.Length(0.5)
// Add the plotters to the plot, with a legend
p.Add(histA, histB, histC)
p.Legend.Add(species[0], histA)
p.Legend.Add(species[1], histB)
p.Legend.Add(species[2], histC)
p.Legend.Top = true
// p.Legend.Left = true
// Save the plot to a PNG file.
if err := p.Save(4*vg.Inch, 4*vg.Inch, "./images/Histogram.png"); err != nil {
log.Fatal(err)
}
}
func main() {
// Read file
f, err := os.Open("./files/iris.csv")
if err != nil {
log.Fatal(err)
}
defer f.Close()
// File to Dataframe
df := dataframe.ReadCSV(f)
fmt.Println(df)
fmt.Println(df.Describe())
// Select {"sepal_length", "sepal_width", "species"}
DF := Preprocess(df)
fmt.Println(DF)
// Separate data by species
species := []string{"setosa", "versicolor", "virginica"}
seDF := SeparateData(DF, species[0])
veDF := SeparateData(DF, species[1])
viDF := SeparateData(DF, species[2])
// fmt.Println(seDF)
// fmt.Println(veDF)
fmt.Println(viDF)
// Save the plot to a PNG file
SaveScatterPlot(seDF, veDF, viDF, species)
SaveHistogram(seDF, veDF, viDF, species)
}