|
| 1 | +package confluencedatacenter |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "context" |
| 6 | + "encoding/base64" |
| 7 | + "errors" |
| 8 | + "fmt" |
| 9 | + "io" |
| 10 | + "net/http" |
| 11 | + "strings" |
| 12 | + |
| 13 | + regexp "github.com/wasilibs/go-re2" |
| 14 | + |
| 15 | + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" |
| 16 | + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" |
| 17 | + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detector_typepb" |
| 18 | +) |
| 19 | + |
| 20 | +type Scanner struct { |
| 21 | + detectors.DefaultMultiPartCredentialProvider |
| 22 | + detectors.EndpointSetter |
| 23 | + client *http.Client |
| 24 | +} |
| 25 | + |
| 26 | +var ( |
| 27 | + _ detectors.Detector = (*Scanner)(nil) |
| 28 | + _ detectors.EndpointCustomizer = (*Scanner)(nil) |
| 29 | +) |
| 30 | + |
| 31 | +func (Scanner) CloudEndpoint() string { return "" } |
| 32 | + |
| 33 | +var ( |
| 34 | + defaultClient = detectors.DetectorHttpClientWithLocalAddresses |
| 35 | + |
| 36 | + keywords = []string{"confluence", "atlassian", "wiki"} |
| 37 | + |
| 38 | + // 44-char base64 PAT; decoded form must match the structural check below. |
| 39 | + tokenPat = regexp.MustCompile(detectors.PrefixRegex(keywords) + `\b([MNO][A-Za-z0-9+/]{43})(?:[^A-Za-z0-9+/=]|\z)`) |
| 40 | + |
| 41 | + // Self-hosted instance URL: scheme + host + optional port. Keyword-scoped |
| 42 | + // so unrelated URLs in the same chunk don't get paired with tokens. |
| 43 | + urlPat = regexp.MustCompile(detectors.PrefixRegex(keywords) + `\b(https?://[a-zA-Z0-9.\-]+(?::\d+)?)\b`) |
| 44 | + |
| 45 | + invalidHosts = simple.NewCache[struct{}]() |
| 46 | + errNoHost = errors.New("no such host") |
| 47 | +) |
| 48 | + |
| 49 | +func (s Scanner) Keywords() []string { |
| 50 | + return keywords |
| 51 | +} |
| 52 | + |
| 53 | +func (s Scanner) Type() detector_typepb.DetectorType { |
| 54 | + return detector_typepb.DetectorType_ConfluenceDataCenter |
| 55 | +} |
| 56 | + |
| 57 | +func (s Scanner) Description() string { |
| 58 | + return "Confluence Data Center is Atlassian's self-hosted wiki product. Personal Access Tokens (PATs) authenticate via Bearer auth against the REST API and grant access scoped to the issuing user." |
| 59 | +} |
| 60 | + |
| 61 | +func (s Scanner) getClient() *http.Client { |
| 62 | + if s.client != nil { |
| 63 | + return s.client |
| 64 | + } |
| 65 | + return defaultClient |
| 66 | +} |
| 67 | + |
| 68 | +// isStructuralPAT decodes a candidate base64 string and checks that it matches |
| 69 | +// the "<numeric id>:<random bytes>" structure used by Confluence DC PATs: |
| 70 | +// one or more ASCII digits, a colon, then at least one more byte. |
| 71 | +func isStructuralPAT(candidate string) bool { |
| 72 | + raw, err := base64.StdEncoding.DecodeString(candidate) |
| 73 | + if err != nil { |
| 74 | + return false |
| 75 | + } |
| 76 | + colon := bytes.IndexByte(raw, ':') |
| 77 | + if colon <= 0 || colon == len(raw)-1 { |
| 78 | + return false |
| 79 | + } |
| 80 | + for _, b := range raw[:colon] { |
| 81 | + if b < '0' || b > '9' { |
| 82 | + return false |
| 83 | + } |
| 84 | + } |
| 85 | + return true |
| 86 | +} |
| 87 | + |
| 88 | +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { |
| 89 | + dataStr := string(data) |
| 90 | + |
| 91 | + uniqueTokens := make(map[string]struct{}) |
| 92 | + for _, m := range tokenPat.FindAllStringSubmatch(dataStr, -1) { |
| 93 | + if _, seen := uniqueTokens[m[1]]; seen { |
| 94 | + continue |
| 95 | + } |
| 96 | + if isStructuralPAT(m[1]) { |
| 97 | + uniqueTokens[m[1]] = struct{}{} |
| 98 | + } |
| 99 | + } |
| 100 | + if len(uniqueTokens) == 0 { |
| 101 | + return nil, nil |
| 102 | + } |
| 103 | + |
| 104 | + foundURLs := make([]string, 0) |
| 105 | + for _, m := range urlPat.FindAllStringSubmatch(dataStr, -1) { |
| 106 | + foundURLs = append(foundURLs, m[1]) |
| 107 | + } |
| 108 | + uniqueURLs := make(map[string]struct{}) |
| 109 | + for _, endpoint := range s.Endpoints(foundURLs...) { |
| 110 | + uniqueURLs[strings.TrimRight(endpoint, "/")] = struct{}{} |
| 111 | + } |
| 112 | + |
| 113 | + // Filter hosts cached as unreachable from prior calls once up front. |
| 114 | + // invalidHosts may also grow during this call (see the verify branch |
| 115 | + // below); those are skipped lazily inside the inner loop. |
| 116 | + liveURLs := make([]string, 0, len(uniqueURLs)) |
| 117 | + for u := range uniqueURLs { |
| 118 | + if !invalidHosts.Exists(u) { |
| 119 | + liveURLs = append(liveURLs, u) |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + for token := range uniqueTokens { |
| 124 | + emitted := false |
| 125 | + for _, baseURL := range liveURLs { |
| 126 | + if invalidHosts.Exists(baseURL) { |
| 127 | + continue |
| 128 | + } |
| 129 | + |
| 130 | + r := detectors.Result{ |
| 131 | + DetectorType: detector_typepb.DetectorType_ConfluenceDataCenter, |
| 132 | + Raw: []byte(token), |
| 133 | + RawV2: []byte(fmt.Sprintf("%s:%s", token, baseURL)), |
| 134 | + ExtraData: map[string]string{ |
| 135 | + "base_url": baseURL, |
| 136 | + }, |
| 137 | + } |
| 138 | + |
| 139 | + if verify { |
| 140 | + isVerified, vErr := verifyPAT(ctx, s.getClient(), baseURL, token) |
| 141 | + r.Verified = isVerified |
| 142 | + if vErr != nil { |
| 143 | + if errors.Is(vErr, errNoHost) { |
| 144 | + invalidHosts.Set(baseURL, struct{}{}) |
| 145 | + } |
| 146 | + r.SetVerificationError(vErr, token) |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + results = append(results, r) |
| 151 | + emitted = true |
| 152 | + } |
| 153 | + |
| 154 | + if !emitted { |
| 155 | + // No reachable URL in context — emit an unverified token-only |
| 156 | + // result and annotate why we couldn't verify. |
| 157 | + results = append(results, detectors.Result{ |
| 158 | + DetectorType: detector_typepb.DetectorType_ConfluenceDataCenter, |
| 159 | + Raw: []byte(token), |
| 160 | + RawV2: []byte(token), |
| 161 | + ExtraData: map[string]string{ |
| 162 | + "verification_note": "no reachable Confluence Data Center URL found in context; token reported unverified", |
| 163 | + }, |
| 164 | + }) |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + return results, nil |
| 169 | +} |
| 170 | + |
| 171 | +func verifyPAT(ctx context.Context, client *http.Client, baseURL, token string) (bool, error) { |
| 172 | + endpoint := strings.TrimRight(baseURL, "/") + "/rest/api/user/current" |
| 173 | + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, http.NoBody) |
| 174 | + if err != nil { |
| 175 | + return false, err |
| 176 | + } |
| 177 | + req.Header.Set("Accept", "application/json") |
| 178 | + req.Header.Set("Authorization", "Bearer "+token) |
| 179 | + |
| 180 | + resp, err := client.Do(req) |
| 181 | + if err != nil { |
| 182 | + if strings.Contains(err.Error(), "no such host") { |
| 183 | + return false, errNoHost |
| 184 | + } |
| 185 | + return false, err |
| 186 | + } |
| 187 | + defer func() { |
| 188 | + _, _ = io.Copy(io.Discard, resp.Body) |
| 189 | + _ = resp.Body.Close() |
| 190 | + }() |
| 191 | + |
| 192 | + switch resp.StatusCode { |
| 193 | + case http.StatusOK: |
| 194 | + return true, nil |
| 195 | + case http.StatusUnauthorized: |
| 196 | + // Auth header outright rejected — unambiguously an invalid credential. |
| 197 | + return false, nil |
| 198 | + default: |
| 199 | + // 403 included: /rest/api/user/current should always be readable by a |
| 200 | + // valid PAT, so a Forbidden here signals something unexpected rather |
| 201 | + // than a definitively invalid token. |
| 202 | + return false, fmt.Errorf("unexpected HTTP response status %d", resp.StatusCode) |
| 203 | + } |
| 204 | +} |
0 commit comments