promodj/parser.go

167 lines
3.3 KiB
Go
Raw Normal View History

2021-08-14 09:59:41 +05:00
package main
import (
"bytes"
"fmt"
"net/url"
2021-11-02 16:13:23 +05:00
"strconv"
2021-08-14 09:59:41 +05:00
"strings"
"github.com/PuerkitoBio/goquery"
2022-01-25 22:34:59 +05:00
"github.com/nxshock/gwp"
2021-08-14 09:59:41 +05:00
)
// Genre represents track information
type TrackInfo struct {
Title string
Url string
}
// Genre represents genre information
type Genre struct {
Name string
Code string
}
// Genres holds cached list of available genres
var Genres []Genre
func UpdateGenres() error {
var err error
Genres, err = updateGenreList()
if err != nil {
return fmt.Errorf("get genres list failed: %w", err)
}
return nil
}
func updateGenreList() ([]Genre, error) {
url := "https://promodj.com/music"
doc, err := goquery.NewDocument(url)
if err != nil {
return nil, err
}
var genres []Genre
doc.Find("div.styles_tagcloud > a").Each(func(i int, s *goquery.Selection) {
genres = append(genres, Genre{s.Text(), strings.TrimPrefix(s.AttrOr("href", ""), "/music/")})
})
return genres, nil
}
2022-01-25 22:34:59 +05:00
func parsePage(url string, resultsChan chan TrackInfo) {
doc, err := goquery.NewDocument(url)
if err != nil {
return
}
doc.Find("div.title > a.invert").Each(
func(n int, s *goquery.Selection) {
href, exists := s.Attr("href")
if !exists {
return
}
fields := strings.Split(href, "/")
if len(fields) != 7 {
return
}
fileUrl := fmt.Sprintf("https://promodj.com/download/%s/%s.mp3", fields[5], fields[6])
resultsChan <- TrackInfo{s.Text(), fileUrl}
})
}
2021-08-14 09:59:41 +05:00
// tracksByGenre возвращает список треков по указанному жанру
2021-11-02 16:13:23 +05:00
func tracksByGenre(genre string, params url.Values) ([]TrackInfo, error) {
if params == nil {
params = url.Values{"download": []string{"1"}}
} else {
params.Set("download", "1") // only available tracks
}
2021-08-14 09:59:41 +05:00
var result []TrackInfo
2022-01-25 22:34:59 +05:00
resultsChan := make(chan TrackInfo)
2021-08-14 09:59:41 +05:00
2022-01-25 22:34:59 +05:00
wp := gwp.New(4)
2021-08-14 09:59:41 +05:00
2022-01-25 22:34:59 +05:00
go func() {
for i := 1; i <= 50; i++ {
params.Set("page", strconv.Itoa(i))
url := constructUrl(genre, params)
2021-08-14 09:59:41 +05:00
2022-01-25 22:34:59 +05:00
wp.Add(func() error {
parsePage(url, resultsChan)
return nil
2021-08-14 09:59:41 +05:00
})
2022-01-25 22:34:59 +05:00
}
wp.CloseAndWait()
close(resultsChan)
}()
for trackInfo := range resultsChan {
result = append(result, trackInfo)
2021-08-14 09:59:41 +05:00
}
result = removeDuplicate(result)
return result, nil
}
// tracksToM3u возвращает байты M3U-плейлиста, сгенерированного по указанному
// списку треков
func tracksToM3u(host string, tracks []TrackInfo) []byte {
b := new(bytes.Buffer)
b.Write([]byte{0xEF, 0xBB, 0xBF})
fmt.Fprint(b, "#EXTM3U\n")
for _, track := range tracks {
fmt.Fprintf(b, "#EXTINF:-1,%s\n", track.Title)
u, _ := url.Parse(fmt.Sprintf("https://%s/stream", host))
q := make(url.Values)
q.Add("url", track.Url)
u.RawQuery = q.Encode()
fmt.Fprintf(b, "%s\n", u.String())
}
return b.Bytes()
}
func removeDuplicate(strSlice []TrackInfo) []TrackInfo {
allKeys := make(map[string]bool)
list := []TrackInfo{}
for _, item := range strSlice {
if _, value := allKeys[item.Url]; !value {
allKeys[item.Url] = true
list = append(list, item)
}
}
return list
}
2021-11-02 16:13:23 +05:00
func constructUrl(genre string, params url.Values) string {
urlTemplate := fmt.Sprintf("https://promodj.com/music/%s", genre)
if genre == "" {
urlTemplate = "https://promodj.com/music"
}
u, err := url.Parse(urlTemplate)
if err != nil {
panic(err)
}
u.RawQuery = params.Encode()
//?download=1&page=%d
return u.String()
}