Skip to content

Commit 6572584

Browse files
authored
Add Confluence Data Center PAT detector (#4886)
* add Confluence Data Center detector
1 parent 4d2a8ef commit 6572584

6 files changed

Lines changed: 384 additions & 7 deletions

File tree

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
package confluencedatacenter
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/base64"
7+
"errors"
8+
"fmt"
9+
"io"
10+
"net/http"
11+
"strings"
12+
13+
regexp "github.com/wasilibs/go-re2"
14+
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
17+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detector_typepb"
18+
)
19+
20+
type Scanner struct {
21+
detectors.DefaultMultiPartCredentialProvider
22+
detectors.EndpointSetter
23+
client *http.Client
24+
}
25+
26+
var (
27+
_ detectors.Detector = (*Scanner)(nil)
28+
_ detectors.EndpointCustomizer = (*Scanner)(nil)
29+
)
30+
31+
func (Scanner) CloudEndpoint() string { return "" }
32+
33+
var (
34+
defaultClient = detectors.DetectorHttpClientWithLocalAddresses
35+
36+
keywords = []string{"confluence", "atlassian", "wiki"}
37+
38+
// 44-char base64 PAT; decoded form must match the structural check below.
39+
tokenPat = regexp.MustCompile(detectors.PrefixRegex(keywords) + `\b([MNO][A-Za-z0-9+/]{43})(?:[^A-Za-z0-9+/=]|\z)`)
40+
41+
// Self-hosted instance URL: scheme + host + optional port. Keyword-scoped
42+
// so unrelated URLs in the same chunk don't get paired with tokens.
43+
urlPat = regexp.MustCompile(detectors.PrefixRegex(keywords) + `\b(https?://[a-zA-Z0-9.\-]+(?::\d+)?)\b`)
44+
45+
invalidHosts = simple.NewCache[struct{}]()
46+
errNoHost = errors.New("no such host")
47+
)
48+
49+
func (s Scanner) Keywords() []string {
50+
return keywords
51+
}
52+
53+
func (s Scanner) Type() detector_typepb.DetectorType {
54+
return detector_typepb.DetectorType_ConfluenceDataCenter
55+
}
56+
57+
func (s Scanner) Description() string {
58+
return "Confluence Data Center is Atlassian's self-hosted wiki product. Personal Access Tokens (PATs) authenticate via Bearer auth against the REST API and grant access scoped to the issuing user."
59+
}
60+
61+
func (s Scanner) getClient() *http.Client {
62+
if s.client != nil {
63+
return s.client
64+
}
65+
return defaultClient
66+
}
67+
68+
// isStructuralPAT decodes a candidate base64 string and checks that it matches
69+
// the "<numeric id>:<random bytes>" structure used by Confluence DC PATs:
70+
// one or more ASCII digits, a colon, then at least one more byte.
71+
func isStructuralPAT(candidate string) bool {
72+
raw, err := base64.StdEncoding.DecodeString(candidate)
73+
if err != nil {
74+
return false
75+
}
76+
colon := bytes.IndexByte(raw, ':')
77+
if colon <= 0 || colon == len(raw)-1 {
78+
return false
79+
}
80+
for _, b := range raw[:colon] {
81+
if b < '0' || b > '9' {
82+
return false
83+
}
84+
}
85+
return true
86+
}
87+
88+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
89+
dataStr := string(data)
90+
91+
uniqueTokens := make(map[string]struct{})
92+
for _, m := range tokenPat.FindAllStringSubmatch(dataStr, -1) {
93+
if _, seen := uniqueTokens[m[1]]; seen {
94+
continue
95+
}
96+
if isStructuralPAT(m[1]) {
97+
uniqueTokens[m[1]] = struct{}{}
98+
}
99+
}
100+
if len(uniqueTokens) == 0 {
101+
return nil, nil
102+
}
103+
104+
foundURLs := make([]string, 0)
105+
for _, m := range urlPat.FindAllStringSubmatch(dataStr, -1) {
106+
foundURLs = append(foundURLs, m[1])
107+
}
108+
uniqueURLs := make(map[string]struct{})
109+
for _, endpoint := range s.Endpoints(foundURLs...) {
110+
uniqueURLs[strings.TrimRight(endpoint, "/")] = struct{}{}
111+
}
112+
113+
// Filter hosts cached as unreachable from prior calls once up front.
114+
// invalidHosts may also grow during this call (see the verify branch
115+
// below); those are skipped lazily inside the inner loop.
116+
liveURLs := make([]string, 0, len(uniqueURLs))
117+
for u := range uniqueURLs {
118+
if !invalidHosts.Exists(u) {
119+
liveURLs = append(liveURLs, u)
120+
}
121+
}
122+
123+
for token := range uniqueTokens {
124+
emitted := false
125+
for _, baseURL := range liveURLs {
126+
if invalidHosts.Exists(baseURL) {
127+
continue
128+
}
129+
130+
r := detectors.Result{
131+
DetectorType: detector_typepb.DetectorType_ConfluenceDataCenter,
132+
Raw: []byte(token),
133+
RawV2: []byte(fmt.Sprintf("%s:%s", token, baseURL)),
134+
ExtraData: map[string]string{
135+
"base_url": baseURL,
136+
},
137+
}
138+
139+
if verify {
140+
isVerified, vErr := verifyPAT(ctx, s.getClient(), baseURL, token)
141+
r.Verified = isVerified
142+
if vErr != nil {
143+
if errors.Is(vErr, errNoHost) {
144+
invalidHosts.Set(baseURL, struct{}{})
145+
}
146+
r.SetVerificationError(vErr, token)
147+
}
148+
}
149+
150+
results = append(results, r)
151+
emitted = true
152+
}
153+
154+
if !emitted {
155+
// No reachable URL in context — emit an unverified token-only
156+
// result and annotate why we couldn't verify.
157+
results = append(results, detectors.Result{
158+
DetectorType: detector_typepb.DetectorType_ConfluenceDataCenter,
159+
Raw: []byte(token),
160+
RawV2: []byte(token),
161+
ExtraData: map[string]string{
162+
"verification_note": "no reachable Confluence Data Center URL found in context; token reported unverified",
163+
},
164+
})
165+
}
166+
}
167+
168+
return results, nil
169+
}
170+
171+
func verifyPAT(ctx context.Context, client *http.Client, baseURL, token string) (bool, error) {
172+
endpoint := strings.TrimRight(baseURL, "/") + "/rest/api/user/current"
173+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, http.NoBody)
174+
if err != nil {
175+
return false, err
176+
}
177+
req.Header.Set("Accept", "application/json")
178+
req.Header.Set("Authorization", "Bearer "+token)
179+
180+
resp, err := client.Do(req)
181+
if err != nil {
182+
if strings.Contains(err.Error(), "no such host") {
183+
return false, errNoHost
184+
}
185+
return false, err
186+
}
187+
defer func() {
188+
_, _ = io.Copy(io.Discard, resp.Body)
189+
_ = resp.Body.Close()
190+
}()
191+
192+
switch resp.StatusCode {
193+
case http.StatusOK:
194+
return true, nil
195+
case http.StatusUnauthorized:
196+
// Auth header outright rejected — unambiguously an invalid credential.
197+
return false, nil
198+
default:
199+
// 403 included: /rest/api/user/current should always be readable by a
200+
// valid PAT, so a Forbidden here signals something unexpected rather
201+
// than a definitively invalid token.
202+
return false, fmt.Errorf("unexpected HTTP response status %d", resp.StatusCode)
203+
}
204+
}
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
package confluencedatacenter
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"testing"
8+
9+
"github.com/google/go-cmp/cmp"
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
"gopkg.in/h2non/gock.v1"
13+
14+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
17+
)
18+
19+
// Real-format sample PATs: each decodes to "<numeric id>:<random bytes>".
20+
const (
21+
validPAT1 = "NTk3MjQzOTIyNTAwOtFOuTsHRIp1E81GApKpC2xpEzfz"
22+
validPAT2 = "NDc4MjM3OTUxMzk2OopoSkTDTnBcWIw0Wa4bico9zOLK"
23+
// 44-char base64 that passes tokenPat (leading [MNO]) but decodes to
24+
// bytes with no colon, so it must be rejected by isStructuralPAT rather
25+
// than by the regex. Exercises the structural post-filter's reject path.
26+
nonStructural = "MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
27+
)
28+
29+
func TestConfluenceDataCenter_Pattern(t *testing.T) {
30+
d := Scanner{}
31+
d.UseFoundEndpoints(true)
32+
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
33+
34+
tests := []struct {
35+
name string
36+
input string
37+
want []string
38+
}{
39+
{
40+
name: "token + instance URL in one chunk",
41+
input: fmt.Sprintf(`
42+
CONFLUENCE_URL=https://wiki.example.com:8443
43+
confluence_token=%s
44+
`, validPAT1),
45+
want: []string{validPAT1 + ":https://wiki.example.com:8443"},
46+
},
47+
{
48+
name: "token + REST API URL pattern",
49+
input: fmt.Sprintf(`
50+
# confluence bearer token: %s
51+
confluence host: https://confluence.corp.local/rest/api/user/current
52+
`, validPAT1),
53+
want: []string{validPAT1 + ":https://confluence.corp.local"},
54+
},
55+
{
56+
name: "token with no URL in context (token-only)",
57+
input: fmt.Sprintf(`
58+
# confluence personal access token for CI
59+
TOKEN=%s
60+
`, validPAT2),
61+
want: []string{validPAT2},
62+
},
63+
{
64+
name: "structural post-filter rejects non-PAT base64",
65+
input: fmt.Sprintf(`
66+
confluence key: %s
67+
`, nonStructural),
68+
want: []string{},
69+
},
70+
{
71+
name: "multiple tokens + multiple URLs => Cartesian product",
72+
input: fmt.Sprintf(`
73+
confluence prod: https://wiki.prod.corp/wiki/
74+
confluence stg: https://wiki.stg.corp/rest/api
75+
confluence_a=%s
76+
confluence_b=%s
77+
`, validPAT1, validPAT2),
78+
want: []string{
79+
validPAT1 + ":https://wiki.prod.corp",
80+
validPAT1 + ":https://wiki.stg.corp",
81+
validPAT2 + ":https://wiki.prod.corp",
82+
validPAT2 + ":https://wiki.stg.corp",
83+
},
84+
},
85+
}
86+
87+
for _, test := range tests {
88+
t.Run(test.name, func(t *testing.T) {
89+
matched := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
90+
if len(test.want) > 0 && len(matched) == 0 {
91+
t.Fatalf("keywords %v not matched by aho-corasick in input", d.Keywords())
92+
}
93+
94+
results, err := d.FromData(context.Background(), false, []byte(test.input))
95+
require.NoError(t, err)
96+
97+
if len(results) != len(test.want) {
98+
t.Fatalf("mismatch in result count: expected %d, got %d (%+v)", len(test.want), len(results), results)
99+
}
100+
101+
actual := make(map[string]struct{}, len(results))
102+
for _, r := range results {
103+
if len(r.RawV2) > 0 {
104+
actual[string(r.RawV2)] = struct{}{}
105+
} else {
106+
actual[string(r.Raw)] = struct{}{}
107+
}
108+
}
109+
expected := make(map[string]struct{}, len(test.want))
110+
for _, v := range test.want {
111+
expected[v] = struct{}{}
112+
}
113+
if diff := cmp.Diff(expected, actual); diff != "" {
114+
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
115+
}
116+
})
117+
}
118+
}
119+
120+
func TestConfluenceDataCenter_Verification(t *testing.T) {
121+
const baseURL = "https://wiki.internal.corp"
122+
123+
cases := []struct {
124+
name string
125+
status int
126+
wantVerified bool
127+
wantVerifyErr bool
128+
}{
129+
{"200 verified", http.StatusOK, true, false},
130+
{"401 invalid", http.StatusUnauthorized, false, false},
131+
{"403 unexpected", http.StatusForbidden, false, true},
132+
{"500 unknown", http.StatusInternalServerError, false, true},
133+
}
134+
135+
for _, tc := range cases {
136+
t.Run(tc.name, func(t *testing.T) {
137+
client := common.SaneHttpClient()
138+
d := Scanner{client: client}
139+
d.UseFoundEndpoints(true)
140+
141+
defer gock.Off()
142+
defer gock.RestoreClient(client)
143+
gock.InterceptClient(client)
144+
145+
gock.New(baseURL).
146+
Get("/rest/api/user/current").
147+
MatchHeader("Authorization", "Bearer "+validPAT1).
148+
Reply(tc.status)
149+
150+
input := fmt.Sprintf("confluence url=%s\nconfluence token=%s\n", baseURL, validPAT1)
151+
152+
results, err := d.FromData(context.Background(), true, []byte(input))
153+
require.NoError(t, err)
154+
require.Len(t, results, 1)
155+
156+
r := results[0]
157+
assert.Equal(t, tc.wantVerified, r.Verified)
158+
if tc.wantVerifyErr {
159+
assert.Error(t, r.VerificationError())
160+
} else {
161+
assert.NoError(t, r.VerificationError())
162+
}
163+
})
164+
}
165+
}

pkg/engine/defaults/defaults.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ import (
185185
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/commercejs"
186186
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/commodities"
187187
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/companyhub"
188+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/confluencedatacenter"
188189
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/confluent"
189190
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/contentfulpersonalaccesstoken"
190191
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/conversiontools"
@@ -1056,6 +1057,7 @@ func buildDetectorList() []detectors.Detector {
10561057
&commercejs.Scanner{},
10571058
&commodities.Scanner{},
10581059
&companyhub.Scanner{},
1060+
&confluencedatacenter.Scanner{},
10591061
&confluent.Scanner{},
10601062
&contentfulpersonalaccesstoken.Scanner{},
10611063
&conversiontools.Scanner{},

pkg/engine/engine_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,7 @@ func TestEngineInitializesCloudProviderDetectors(t *testing.T) {
13801380
detector_typepb.DetectorType_TableauPersonalAccessToken: {},
13811381
detector_typepb.DetectorType_HashiCorpVaultAuth: {},
13821382
detector_typepb.DetectorType_JiraDataCenterPAT: {},
1383+
detector_typepb.DetectorType_ConfluenceDataCenter: {},
13831384
// these do not have any cloud endpoint
13841385
}
13851386

0 commit comments

Comments
 (0)