From f67a1030b308a24cca13ff788f7b7119f0404580 Mon Sep 17 00:00:00 2001
From: wxiaoguang <wxiaoguang@gmail.com>
Date: Wed, 13 Jul 2022 09:07:16 +0800
Subject: [PATCH] Add tests for the host checking logic, clarify the behaviors
 (#20328)

Before, the combination of AllowedDomains/BlockedDomains/AllowLocalNetworks is confusing.

This PR adds tests for the logic, clarify the behaviors.
---
 custom/conf/app.example.ini                   |  1 +
 .../doc/advanced/config-cheat-sheet.en-us.md  |  2 +-
 modules/hostmatcher/hostmatcher.go            |  8 ++--
 services/migrations/migrate.go                | 11 +++--
 services/migrations/migrate_test.go           | 40 +++++++++++++++++++
 5 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini
index fb43ea95a1..0870894894 100644
--- a/custom/conf/app.example.ini
+++ b/custom/conf/app.example.ini
@@ -2232,6 +2232,7 @@ ROUTER = console
 ;BLOCKED_DOMAINS =
 ;;
 ;; Allow private addresses defined by RFC 1918, RFC 1122, RFC 4632 and RFC 4291 (false by default)
+;; If a domain is allowed by ALLOWED_DOMAINS, this option will be ignored.
 ;ALLOW_LOCALNETWORKS = false
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index 84e3c6ae33..a0e6fb8f13 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -1083,7 +1083,7 @@ Task queue configuration has been moved to `queue.task`. However, the below conf
 - `RETRY_BACKOFF`: **3**: Backoff time per http/https request retry (seconds)
 - `ALLOWED_DOMAINS`: **\<empty\>**: Domains allowlist for migrating repositories, default is blank. It means everything will be allowed. Multiple domains could be separated by commas. Wildcard is supported: `github.com, *.github.com`.
 - `BLOCKED_DOMAINS`: **\<empty\>**: Domains blocklist for migrating repositories, default is blank. Multiple domains could be separated by commas. When `ALLOWED_DOMAINS` is not blank, this option has a higher priority to deny domains. Wildcard is supported.
-- `ALLOW_LOCALNETWORKS`: **false**: Allow private addresses defined by RFC 1918, RFC 1122, RFC 4632 and RFC 4291
+- `ALLOW_LOCALNETWORKS`: **false**: Allow private addresses defined by RFC 1918, RFC 1122, RFC 4632 and RFC 4291. If a domain is allowed by `ALLOWED_DOMAINS`, this option will be ignored.
 - `SKIP_TLS_VERIFY`: **false**: Allow skip tls verify
 
 ## Federation (`federation`)
diff --git a/modules/hostmatcher/hostmatcher.go b/modules/hostmatcher/hostmatcher.go
index 00bbc6cb0a..81c4202fcd 100644
--- a/modules/hostmatcher/hostmatcher.go
+++ b/modules/hostmatcher/hostmatcher.go
@@ -125,14 +125,14 @@ func (hl *HostMatchList) checkIP(ip net.IP) bool {
 
 // MatchHostName checks if the host matches an allow/deny(block) list
 func (hl *HostMatchList) MatchHostName(host string) bool {
+	if hl == nil {
+		return false
+	}
+
 	hostname, _, err := net.SplitHostPort(host)
 	if err != nil {
 		hostname = host
 	}
-
-	if hl == nil {
-		return false
-	}
 	if hl.checkPattern(hostname) {
 		return true
 	}
diff --git a/services/migrations/migrate.go b/services/migrations/migrate.go
index ce76733bd5..f2542173a0 100644
--- a/services/migrations/migrate.go
+++ b/services/migrations/migrate.go
@@ -84,7 +84,10 @@ func IsMigrateURLAllowed(remoteURL string, doer *user_model.User) error {
 
 	// some users only use proxy, there is no DNS resolver. it's safe to ignore the LookupIP error
 	addrList, _ := net.LookupIP(hostName)
+	return checkByAllowBlockList(hostName, addrList)
+}
 
+func checkByAllowBlockList(hostName string, addrList []net.IP) error {
 	var ipAllowed bool
 	var ipBlocked bool
 	for _, addr := range addrList {
@@ -93,12 +96,12 @@ func IsMigrateURLAllowed(remoteURL string, doer *user_model.User) error {
 	}
 	var blockedError error
 	if blockList.MatchHostName(hostName) || ipBlocked {
-		blockedError = &models.ErrInvalidCloneAddr{Host: u.Host, IsPermissionDenied: true}
+		blockedError = &models.ErrInvalidCloneAddr{Host: hostName, IsPermissionDenied: true}
 	}
-	// if we have an allow-list, check the allow-list first
+	// if we have an allow-list, check the allow-list before return to get the more accurate error
 	if !allowList.IsEmpty() {
 		if !allowList.MatchHostName(hostName) && !ipAllowed {
-			return &models.ErrInvalidCloneAddr{Host: u.Host, IsPermissionDenied: true}
+			return &models.ErrInvalidCloneAddr{Host: hostName, IsPermissionDenied: true}
 		}
 	}
 	// otherwise, we always follow the blocked list
@@ -474,5 +477,7 @@ func Init() error {
 		allowList.AppendBuiltin(hostmatcher.MatchBuiltinPrivate)
 		allowList.AppendBuiltin(hostmatcher.MatchBuiltinLoopback)
 	}
+	// TODO: at the moment, if ALLOW_LOCALNETWORKS=false, ALLOWED_DOMAINS=domain.com, and domain.com has IP 127.0.0.1, then it's still allowed.
+	// if we want to block such case, the private&loopback should be added to the blockList when ALLOW_LOCALNETWORKS=false
 	return nil
 }
diff --git a/services/migrations/migrate_test.go b/services/migrations/migrate_test.go
index d09c184d91..53cfe6d3eb 100644
--- a/services/migrations/migrate_test.go
+++ b/services/migrations/migrate_test.go
@@ -5,6 +5,7 @@
 package migrations
 
 import (
+	"net"
 	"path/filepath"
 	"testing"
 
@@ -74,3 +75,42 @@ func TestMigrateWhiteBlocklist(t *testing.T) {
 
 	setting.ImportLocalPaths = old
 }
+
+func TestAllowBlockList(t *testing.T) {
+	init := func(allow, block string, local bool) {
+		setting.Migrations.AllowedDomains = allow
+		setting.Migrations.BlockedDomains = block
+		setting.Migrations.AllowLocalNetworks = local
+		assert.NoError(t, Init())
+	}
+
+	// default, allow all external, block none, no local networks
+	init("", "", false)
+	assert.NoError(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("1.2.3.4")}))
+	assert.Error(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("127.0.0.1")}))
+
+	// allow all including local networks (it could lead to SSRF in production)
+	init("", "", true)
+	assert.NoError(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("1.2.3.4")}))
+	assert.NoError(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("127.0.0.1")}))
+
+	// allow wildcard, block some subdomains. if the domain name is allowed, then the local network check is skipped
+	init("*.domain.com", "blocked.domain.com", false)
+	assert.NoError(t, checkByAllowBlockList("sub.domain.com", []net.IP{net.ParseIP("1.2.3.4")}))
+	assert.NoError(t, checkByAllowBlockList("sub.domain.com", []net.IP{net.ParseIP("127.0.0.1")}))
+	assert.Error(t, checkByAllowBlockList("blocked.domain.com", []net.IP{net.ParseIP("1.2.3.4")}))
+	assert.Error(t, checkByAllowBlockList("sub.other.com", []net.IP{net.ParseIP("1.2.3.4")}))
+
+	// allow wildcard (it could lead to SSRF in production)
+	init("*", "", false)
+	assert.NoError(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("1.2.3.4")}))
+	assert.NoError(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("127.0.0.1")}))
+
+	// local network can still be blocked
+	init("*", "127.0.0.*", false)
+	assert.NoError(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("1.2.3.4")}))
+	assert.Error(t, checkByAllowBlockList("domain.com", []net.IP{net.ParseIP("127.0.0.1")}))
+
+	// reset
+	init("", "", false)
+}