Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tune pool while refreshing sources #147

Merged
merged 1 commit into from
Jul 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 13 additions & 22 deletions refresher/refresher.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ type Refresher struct {

type probeContract interface {
Schedule(ctx context.Context, proxy pmux.Proxy, source int) bool
Forget(ctx context.Context, proxy pmux.Proxy, err error) bool
}

type poolContract interface {
Expand Down Expand Up @@ -309,31 +310,21 @@ func (ref *Refresher) refresh(ctx context.Context, client *http.Client, source s
ref.stats.Launch(source.ID)
feed := source.Feed(ctx, client)
ref.progress <- progress{source.ID, 0}
for proxy := range feed.Generate(ctx) {
ctx := app.Log.WithStringer(ctx, "proxy", proxy)
if !ref.probe.Schedule(ctx, proxy, source.ID) {
for signal := range feed.Generate(ctx) {
ctx := app.Log.WithStringer(ctx, "proxy", signal.Proxy)
log := app.Log.From(ctx)
if !signal.Add {
log.Info().Err(signal.Err).Msg("forgetting proxy")
// let's see if it's not too aggressive
if !ref.probe.Forget(ctx, signal.Proxy, signal.Err) {
log.Warn().Msg("failed to forget")
}
continue
}
if !ref.probe.Schedule(ctx, signal.Proxy, source.ID) {
log.Warn().Msg("failed to schedule") // TODO: this happens too often
}
ref.progress <- progress{source.ID, feed.Len()}

// if proxy.Proto == pmux.HTTP {
// if !ref.probe.Schedule(ctx, pmux.Proxy{
// IP: proxy.IP,
// Port: proxy.Port,
// Proto: pmux.HTTPS,
// }, source.ID) {
// log.Warn().Msg("failed to schedule")
// }
// }
// if proxy.Proto == pmux.HTTPS {
// if !ref.probe.Schedule(ctx, pmux.Proxy{
// IP: proxy.IP,
// Port: proxy.Port,
// Proto: pmux.HTTP,
// }, source.ID) {
// log.Warn().Msg("failed to schedule")
// }
// }
}
// TODO: maybe update failed state from a secong goroutine?...
ref.stats.Finish(source.ID, feed.Err())
Expand Down
21 changes: 14 additions & 7 deletions refresher/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ func (c counterProbe) Schedule(ctx context.Context, proxy pmux.Proxy, source int
return true
}

func (c counterProbe) Forget(ctx context.Context, proxy pmux.Proxy, err error) bool {
return true
}

type mockStats map[int]*stats.Stat

func (m mockStats) Launch(source int) {
Expand Down Expand Up @@ -102,15 +106,18 @@ var stubSource = []sources.Source{

type proxyArraySrc []pmux.Proxy

func (t proxyArraySrc) Generate(ctx context.Context) <-chan pmux.Proxy {
out := make(chan pmux.Proxy)
func (t proxyArraySrc) Generate(ctx context.Context) <-chan sources.Signal {
out := make(chan sources.Signal)
go func() {
defer close(out)
for _, v := range t {
select {
case <-ctx.Done():
return
case out <- v:
case out <- sources.Signal{
Proxy: v,
Add: true,
}:
}
}
}()
Expand All @@ -127,8 +134,8 @@ func (t proxyArraySrc) Len() int {

type sleepingSrc int

func (t sleepingSrc) Generate(ctx context.Context) <-chan pmux.Proxy {
out := make(chan pmux.Proxy)
func (t sleepingSrc) Generate(ctx context.Context) <-chan sources.Signal {
out := make(chan sources.Signal)
go func() {
defer close(out)
for {
Expand All @@ -153,8 +160,8 @@ func (t sleepingSrc) Len() int {

type failingSrc string

func (f failingSrc) Generate(ctx context.Context) <-chan pmux.Proxy {
out := make(chan pmux.Proxy)
func (f failingSrc) Generate(ctx context.Context) <-chan sources.Signal {
out := make(chan sources.Signal)
close(out)
return out
}
Expand Down
13 changes: 13 additions & 0 deletions sources/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"

"github.com/nfx/slrp/pmux"
"github.com/rs/zerolog"
)

Expand Down Expand Up @@ -35,6 +36,18 @@ type sourceError struct {
skip bool
}

func (se sourceError) Proxy() pmux.Proxy {
for _, v := range se.fields {
switch x := v.(type) {
case strEC:
return pmux.NewProxyFromURL(x.value)
default:
continue
}
}
return 0
}

func (se sourceError) Error() string {
ctx := se.msg
for _, v := range se.fields {
Expand Down
6 changes: 6 additions & 0 deletions sources/freeproxycz.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ func freeProxyCz(ctx context.Context, h *http.Client) Src {
return
}
err = p.Each3("IP address", "Port", "Protocol", func(a, b, c string) error {
if strings.Contains(a, "window.adsbygoogle") {
return nil
}
if strings.Contains(a, "Please try again later") {
return nil
}
enc := strings.Split(a, `"`)
if len(enc) != 3 {
return fmt.Errorf("mangled address: %s", a)
Expand Down
77 changes: 36 additions & 41 deletions sources/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,43 @@ func init() {
"socks4": "/master/socks4.txt",
"socks5": "/master/socks5.txt",
}, 3*time.Hour),
github(17, "jetkai/proxy-list", map[string]string{
"http": "/main/online-proxies/txt/proxies-http.txt",
"https": "/main/online-proxies/txt/proxies-https.txt",
"socks4": "/main/online-proxies/txt/proxies-socks4.txt",
"socks5": "/main/online-proxies/txt/proxies-socks5.txt",
}, 2*time.Hour),
github(25, "almroot/proxylist", map[string]string{
"http": "/master/list.txt",
}, 1*time.Hour),
// github(17, "jetkai/proxy-list", map[string]string{
// "http": "/main/online-proxies/txt/proxies-http.txt",
// "https": "/main/online-proxies/txt/proxies-https.txt",
// "socks4": "/main/online-proxies/txt/proxies-socks4.txt",
// "socks5": "/main/online-proxies/txt/proxies-socks5.txt",
// }, 2*time.Hour),
// github(25, "almroot/proxylist", map[string]string{
// "http": "/master/list.txt",
// }, 1*time.Hour),
github(26, "andigwandi/free-proxy", map[string]string{
"http": "/main/proxy_list.txt",
}, 2*time.Hour),
github(27, "aslisk/proxyhttps", map[string]string{
"http": "/main/https.txt",
}, 24*time.Hour),
github(28, "B4RC0DE-TM/proxy-list", map[string]string{
"http": "/main/HTTP.txt",
"socks4": "/main/SOCKS4.txt",
"socks5": "/main/SOCKS5.txt",
}, 24*time.Hour),
github(29, "BlackSnowDot/proxylist-update-every-minute", map[string]string{
"http": "/main/https.txt",
"socks4": "/main/socks.txt",
}, 15*time.Minute),
// github(28, "B4RC0DE-TM/proxy-list", map[string]string{
// "http": "/main/HTTP.txt",
// "socks4": "/main/SOCKS4.txt",
// "socks5": "/main/SOCKS5.txt",
// }, 24*time.Hour),
// github(29, "BlackSnowDot/proxylist-update-every-minute", map[string]string{
// "http": "/main/https.txt",
// "socks4": "/main/socks.txt",
// }, 15*time.Minute),
github(30, "fahimscirex/proxybd", map[string]string{
"http": "/master/proxylist/http.txt",
"socks4": "/master/proxylist/socks4.txt",
"socks5": "/master/proxylist/socks5.txt",
}, 4*time.Hour),
github(31, "hanwayTech/free-proxy-list", map[string]string{
"http": "/main/https.txt",
"socks4": "/main/socks4.txt",
"socks5": "/main/socks5.txt",
}, 1*time.Hour),
github(32, "hendrikbgr/Free-Proxy-Repo", map[string]string{
"http": "/master/proxy_list.txt",
}, 24*time.Hour),
// github(31, "hanwayTech/free-proxy-list", map[string]string{
// "http": "/main/https.txt",
// "socks4": "/main/socks4.txt",
// "socks5": "/main/socks5.txt",
// }, 1*time.Hour),
// github(32, "hendrikbgr/Free-Proxy-Repo", map[string]string{
// "http": "/master/proxy_list.txt",
// }, 24*time.Hour),
github(33, "hookzof/socks5_list", map[string]string{
"socks5": "/master/proxy.txt",
}, 5*time.Minute),
Expand All @@ -66,11 +66,11 @@ func init() {
github(36, "mertguvencli/http-proxy-list", map[string]string{
"http": "/main/proxy-list/data.txt",
}, 10*time.Minute),
github(37, "miyukii-chan/proxy-list", map[string]string{
"http": "/master/proxies/http.txt",
"socks4": "/master/proxies/socks4.txt",
"socks5": "/master/proxies/socks5.txt",
}, 24*time.Hour),
// github(37, "miyukii-chan/proxy-list", map[string]string{
// "http": "/master/proxies/http.txt",
// "socks4": "/master/proxies/socks4.txt",
// "socks5": "/master/proxies/socks5.txt",
// }, 24*time.Hour),
github(38, "mmpx12/proxy-list", map[string]string{
"http": "/master/https.txt",
"socks4": "/master/socks4.txt",
Expand Down Expand Up @@ -100,11 +100,11 @@ func init() {
// "socks4": "/main/socks4.txt",
// "socks5": "/main/socks5.txt",
// }, 3*time.Hour),
github(44, "rdavydov/proxy-list", map[string]string{
"http": "/main/proxies/http.txt",
"socks4": "/main/proxies/socks4.txt",
"socks5": "/main/proxies/socks5.txt",
}, 30*time.Minute),
// github(44, "rdavydov/proxy-list", map[string]string{
// "http": "/main/proxies/http.txt",
// "socks4": "/main/proxies/socks4.txt",
// "socks5": "/main/proxies/socks5.txt",
// }, 30*time.Minute),
github(45, "ReCaree/proxy-scrapper", map[string]string{
"http": "/master/proxy/http-removed.txt",
"socks4": "/master/proxy/socks4-removed.txt",
Expand All @@ -115,11 +115,6 @@ func init() {
"socks4": "/main/SOCKS4_RAW.txt",
"socks5": "/main/SOCKS5_RAW.txt",
}, 30*time.Minute),
github(49, "saschazesiger/Free-Proxies", map[string]string{
"http": "/master/proxies/http.txt",
"socks4": "/master/proxies/socks4.txt",
"socks5": "/master/proxies/socks5.txt",
}, 1*time.Hour),
github(50, "ShiftyTR/Proxy-List", map[string]string{
"http": "/master/https.txt",
"socks4": "/master/socks4.txt",
Expand Down
2 changes: 1 addition & 1 deletion sources/premproxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestPremproxy(t *testing.T) {
src := premproxy(ctx, &http.Client{})
seen := map[string]int{}
for x := range src.Generate(ctx) {
y := x.String()
y := x.Proxy.String()
seen[y] = seen[y] + 1
}
log.Printf("found: %d", len(seen))
Expand Down
2 changes: 1 addition & 1 deletion sources/sources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (

func consumeSource(ctx context.Context, feed Src) (found []pmux.Proxy) {
for proxy := range feed.Generate(ctx) {
found = append(found, proxy)
found = append(found, proxy.Proxy)
}
return
}
Expand Down
Loading