SSWConsulting · zacharykeeping · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/docker/sswlinkauditor b/docker/sswlinkauditor
diff --git a/docker/sswlinkauditor.go b/docker/sswlinkauditor.go
@@ -198,11 +198,24 @@ func isSameOrigin(url1 string, url2 string) bool {
 }
 
 func isSameOriginAndPath(baseUrl string, targetUrl string) bool {
-	// Normalize URLs by ensuring they have trailing slashes for comparison
-	normalizedBase := strings.TrimRight(baseUrl, "/") + "/"
-	normalizedTarget := strings.TrimRight(targetUrl, "/") + "/"
+	// Normalize URLs by removing trailing slashes for comparison
+	normalizedBase := strings.TrimRight(baseUrl, "/")
+	normalizedTarget := strings.TrimRight(targetUrl, "/")
 
-	return strings.Index(normalizedTarget, normalizedBase) == 0
+	// Check if target starts with base
+	if !strings.HasPrefix(normalizedTarget, normalizedBase) {
+		return false
+	}
+
+	// If exact match, return true
+	if normalizedTarget == normalizedBase {
+		return true
+	}
+
+	// Check that what comes after the base is a path separator, query, or fragment
+	// This prevents false positives like "/api" matching "/api-v2"
+	remainder := normalizedTarget[len(normalizedBase):]
+	return strings.HasPrefix(remainder, "/") || strings.HasPrefix(remainder, "?") || strings.HasPrefix(remainder, "#")
 }
 
 func crawl(link Link, ch chan Link, linkch chan LinkStatus, number int) {
@@ -319,13 +332,17 @@ func parseUrl(startUrl string, url string) string {
 		if len(filenameRegex.FindStringSubmatch(UrlPath)) > 0 {
 			fileName := filenameRegex.FindStringSubmatch(UrlPath)[0]
 			UrlPath = strings.ReplaceAll(UrlPath, fileName, "")
+		} else if !strings.HasSuffix(UrlPath, "/") && UrlPath != "" {
+			// If the path doesn't end with / and isn't empty, it's a document not a directory
+			// Get the parent directory for relative link resolution
+			lastSlash := strings.LastIndex(UrlPath, "/")
+			if lastSlash >= 0 {
+				UrlPath = UrlPath[:lastSlash+1]
+			}
 		}
 
 		baseUrl := sUrl.Scheme + "://" + sUrl.Hostname() + UrlPath
-		if !strings.HasSuffix(baseUrl, "/") {
-			baseUrl = baseUrl + "/"
-		}
-
+
 		u, _ := urlP.Parse(baseUrl)
 		u.Path = path.Join(u.Path, url)
 		return u.String()

diff --git a/docker/sswlinkauditor_test.go b/docker/sswlinkauditor_test.go
@@ -0,0 +1,242 @@
+package main
+
+import (
+	urlP "net/url"
+	"testing"
+)
+
+func TestParseUrl(t *testing.T) {
+	tests := []struct {
+		name     string
+		startUrl string
+		url      string
+		expected string
+	}{
+		{
+			name:     "Absolute URL - no change",
+			startUrl: "https://example.com/page",
+			url:      "https://other.com/path",
+			expected: "https://other.com/path",
+		},
+		{
+			name:     "Protocol-relative URL",
+			startUrl: "https://example.com/page",
+			url:      "//cdn.example.com/resource.js",
+			expected: "https://cdn.example.com/resource.js",
+		},
+		{
+			name:     "Root-relative URL",
+			startUrl: "https://example.com/some/page",
+			url:      "/about",
+			expected: "https://example.com/about",
+		},
+		{
+			name:     "Relative URL from page with trailing slash",
+			startUrl: "https://example.com/blog/",
+			url:      "post",
+			expected: "https://example.com/blog/post",
+		},
+		{
+			name:     "Relative URL from page without trailing slash",
+			startUrl: "https://example.com/blog",
+			url:      "post",
+			expected: "https://example.com/post", // blog is a page, so resolve relative to parent
+		},
+		{
+			name:     "Relative URL from directory with trailing slash",
+			startUrl: "https://example.com/blog/",
+			url:      "post",
+			expected: "https://example.com/blog/post",
+		},
+		{
+			name:     "Relative URL from page with extension",
+			startUrl: "https://example.com/blog/index.html",
+			url:      "about.html",
+			expected: "https://example.com/blog/about.html",
+		},
+		{
+			name:     "URL with fragment - fragment removed",
+			startUrl: "https://example.com/page",
+			url:      "https://example.com/other#section",
+			expected: "https://example.com/other",
+		},
+		{
+			name:     "Deep relative URL",
+			startUrl: "https://example.com/a/b/c/page.html",
+			url:      "d/e/file.html",
+			expected: "https://example.com/a/b/c/d/e/file.html",
+		},
+		{
+			name:     "Relative URL with .aspx extension in startUrl",
+			startUrl: "https://example.com/products/list.aspx",
+			url:      "detail",
+			expected: "https://example.com/products/detail",
+		},
+		{
+			name:     "Root path should not add trailing slash",
+			startUrl: "https://example.com",
+			url:      "page",
+			expected: "https://example.com/page",
+		},
+		{
+			name:     "SSW Rules page - relative link without extension",
+			startUrl: "https://www.ssw.com.au/rules/best-way-to-display-code-on-your-website",
+			url:      "set-language-on-code-blocks",
+			expected: "https://www.ssw.com.au/rules/set-language-on-code-blocks",
+		},
+		{
+			name:     "Root level page - relative link",
+			startUrl: "https://example.com/page",
+			url:      "other",
+			expected: "https://example.com/other",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := parseUrl(tt.startUrl, tt.url)
+			if result != tt.expected {
+				t.Errorf("parseUrl(%q, %q) = %q; want %q", tt.startUrl, tt.url, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestParseUrl_NoTrailingSlashAdded(t *testing.T) {
+	// Specifically test that we don't unconditionally add trailing slashes
+	tests := []struct {
+		name     string
+		startUrl string
+		url      string
+		wantPath string
+	}{
+		{
+			name:     "Should not add slash to base without directory",
+			startUrl: "https://example.com/page",
+			url:      "other",
+			wantPath: "/other", // path.Join should resolve this correctly
+		},
+		{
+			name:     "Should preserve path structure",
+			startUrl: "https://example.com/api/v1",
+			url:      "users",
+			wantPath: "/api/users", // Should not become /api/v1/users
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := parseUrl(tt.startUrl, tt.url)
+			// Check that the path portion matches expectations
+			if !containsPath(result, tt.wantPath) {
+				t.Errorf("parseUrl(%q, %q) = %q; expected path to contain %q", tt.startUrl, tt.url, result, tt.wantPath)
+			}
+		})
+	}
+}
+
+func TestIsSameOriginAndPath(t *testing.T) {
+	tests := []struct {
+		name      string
+		baseUrl   string
+		targetUrl string
+		expected  bool
+	}{
+		{
+			name:      "Exact match",
+			baseUrl:   "https://example.com/blog",
+			targetUrl: "https://example.com/blog",
+			expected:  true,
+		},
+		{
+			name:      "Exact match with trailing slash on both",
+			baseUrl:   "https://example.com/blog/",
+			targetUrl: "https://example.com/blog/",
+			expected:  true,
+		},
+		{
+			name:      "Match with one trailing slash",
+			baseUrl:   "https://example.com/blog",
+			targetUrl: "https://example.com/blog/",
+			expected:  true,
+		},
+		{
+			name:      "Child path with separator",
+			baseUrl:   "https://example.com/blog",
+			targetUrl: "https://example.com/blog/post",
+			expected:  true,
+		},
+		{
+			name:      "Deep child path",
+			baseUrl:   "https://example.com/blog",
+			targetUrl: "https://example.com/blog/2024/01/post",
+			expected:  true,
+		},
+		{
+			name:      "Same prefix but different path - should NOT match",
+			baseUrl:   "https://example.com/api",
+			targetUrl: "https://example.com/api-v2",
+			expected:  false,
+		},
+		{
+			name:      "Same prefix with hyphen - should NOT match",
+			baseUrl:   "https://example.com/blog",
+			targetUrl: "https://example.com/blogpost",
+			expected:  false,
+		},
+		{
+			name:      "Different domain - should NOT match",
+			baseUrl:   "https://example.com/blog",
+			targetUrl: "https://other.com/blog",
+			expected:  false,
+		},
+		{
+			name:      "Base is longer - should NOT match",
+			baseUrl:   "https://example.com/blog/post",
+			targetUrl: "https://example.com/blog",
+			expected:  false,
+		},
+		{
+			name:      "Query string continuation",
+			baseUrl:   "https://example.com/search",
+			targetUrl: "https://example.com/search?q=test",
+			expected:  true,
+		},
+		{
+			name:      "Fragment continuation",
+			baseUrl:   "https://example.com/page",
+			targetUrl: "https://example.com/page#section",
+			expected:  true,
+		},
+		{
+			name:      "Root path",
+			baseUrl:   "https://example.com/",
+			targetUrl: "https://example.com/anything",
+			expected:  true,
+		},
+		{
+			name:      "Empty path base",
+			baseUrl:   "https://example.com",
+			targetUrl: "https://example.com/page",
+			expected:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := isSameOriginAndPath(tt.baseUrl, tt.targetUrl)
+			if result != tt.expected {
+				t.Errorf("isSameOriginAndPath(%q, %q) = %v; want %v", tt.baseUrl, tt.targetUrl, result, tt.expected)
+			}
+		})
+	}
+}
+
+// Helper function to check if a URL contains a specific path
+func containsPath(url, wantPath string) bool {
+	parsed, err := urlP.Parse(url)
+	if err != nil {
+		return false
+	}
+	return parsed.Path == wantPath
+}