Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

- The SDK now automatically propagates the trace-context to the native layer. This allows to connect errors on different layers of the application. ([#4137](https://github.com/getsentry/sentry-java/pull/4137))

### Behavioural Changes

- Use `java.net.URI` for parsing URLs in `UrlUtils` ([#4210](https://github.com/getsentry/sentry-java/pull/4210))
- This could affect grouping for issues with messages containing URLs that fall in known corner cases that were handled incorrectly previously (e.g. email in URL path)

### Dependencies

- Bump Native SDK from v0.7.20 to v0.8.1 ([#4137](https://github.com/getsentry/sentry-java/pull/4137))
Expand Down
135 changes: 32 additions & 103 deletions sentry/src/main/java/io/sentry/util/UrlUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
import io.sentry.ISpan;
import io.sentry.SpanDataConvention;
import io.sentry.protocol.Request;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.net.URI;
import org.jetbrains.annotations.ApiStatus;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
Expand All @@ -15,123 +12,55 @@
public final class UrlUtils {

public static final @NotNull String SENSITIVE_DATA_SUBSTITUTE = "[Filtered]";
private static final @NotNull Pattern AUTH_REGEX = Pattern.compile("(.+://)(.*@)(.*)");

public static @Nullable UrlDetails parseNullable(final @Nullable String url) {
if (url == null) {
return null;
}

return parse(url);
return url == null ? null : parse(url);
}

public static @NotNull UrlDetails parse(final @NotNull String url) {
if (isAbsoluteUrl(url)) {
return splitAbsoluteUrl(url);
} else {
return splitRelativeUrl(url);
}
}

private static boolean isAbsoluteUrl(@NotNull String url) {
return url.contains("://");
}

private static @NotNull UrlDetails splitRelativeUrl(final @NotNull String url) {
final int queryParamSeparatorIndex = url.indexOf("?");
final int fragmentSeparatorIndex = url.indexOf("#");

final @Nullable String baseUrl =
extractBaseUrl(url, queryParamSeparatorIndex, fragmentSeparatorIndex);
final @Nullable String query =
extractQuery(url, queryParamSeparatorIndex, fragmentSeparatorIndex);
final @Nullable String fragment = extractFragment(url, fragmentSeparatorIndex);
try {
URI uri = new URI(url);
if (uri.isAbsolute() && !isValidAbsoluteUrl(uri)) {
return new UrlDetails(null, null, null);
}

return new UrlDetails(baseUrl, query, fragment);
}
final @NotNull String schemeAndSeparator =
uri.getScheme() == null ? "" : (uri.getScheme() + "://");
final @NotNull String authority = uri.getRawAuthority() == null ? "" : uri.getRawAuthority();
final @NotNull String path = uri.getRawPath() == null ? "" : uri.getRawPath();
final @Nullable String query = uri.getRawQuery();
final @Nullable String fragment = uri.getRawFragment();

private static @Nullable String extractBaseUrl(
final @NotNull String url,
final int queryParamSeparatorIndex,
final int fragmentSeparatorIndex) {
if (queryParamSeparatorIndex >= 0) {
return url.substring(0, queryParamSeparatorIndex).trim();
} else if (fragmentSeparatorIndex >= 0) {
return url.substring(0, fragmentSeparatorIndex).trim();
} else {
return url;
}
}
final @NotNull String filteredUrl = schemeAndSeparator + filterUserInfo(authority) + path;

private static @Nullable String extractQuery(
final @NotNull String url,
final int queryParamSeparatorIndex,
final int fragmentSeparatorIndex) {
if (queryParamSeparatorIndex > 0) {
if (fragmentSeparatorIndex > 0 && fragmentSeparatorIndex > queryParamSeparatorIndex) {
return url.substring(queryParamSeparatorIndex + 1, fragmentSeparatorIndex).trim();
} else {
return url.substring(queryParamSeparatorIndex + 1).trim();
}
} else {
return null;
}
}

private static @Nullable String extractFragment(
final @NotNull String url, final int fragmentSeparatorIndex) {
if (fragmentSeparatorIndex > 0) {
return url.substring(fragmentSeparatorIndex + 1).trim();
} else {
return null;
return new UrlDetails(filteredUrl, query, fragment);
} catch (Exception e) {
return new UrlDetails(null, null, null);
}
}

private static @NotNull UrlDetails splitAbsoluteUrl(final @NotNull String url) {
private static boolean isValidAbsoluteUrl(final @NotNull URI uri) {
try {
final @NotNull String filteredUrl = urlWithAuthRemoved(url);
final @NotNull URL urlObj = new URL(url);
final @NotNull String baseUrl = baseUrlOnly(filteredUrl);
if (baseUrl.contains("#")) {
// url considered malformed because it has fragment
return new UrlDetails(null, null, null);
} else {
final @Nullable String query = urlObj.getQuery();
final @Nullable String fragment = urlObj.getRef();
return new UrlDetails(baseUrl, query, fragment);
}
} catch (MalformedURLException e) {
return new UrlDetails(null, null, null);
uri.toURL();
} catch (Exception e) {
return false;
}
return true;
}

private static @NotNull String urlWithAuthRemoved(final @NotNull String url) {
final @NotNull Matcher userInfoMatcher = AUTH_REGEX.matcher(url);
if (userInfoMatcher.matches() && userInfoMatcher.groupCount() == 3) {
final @NotNull String userInfoString = userInfoMatcher.group(2);
final @NotNull String replacementString =
userInfoString.contains(":")
? (SENSITIVE_DATA_SUBSTITUTE + ":" + SENSITIVE_DATA_SUBSTITUTE + "@")
: (SENSITIVE_DATA_SUBSTITUTE + "@");
return userInfoMatcher.group(1) + replacementString + userInfoMatcher.group(3);
} else {
private static @NotNull String filterUserInfo(final @NotNull String url) {
if (!url.contains("@")) {
return url;
}
}

private static @NotNull String baseUrlOnly(final @NotNull String url) {
final int queryParamSeparatorIndex = url.indexOf("?");

if (queryParamSeparatorIndex >= 0) {
return url.substring(0, queryParamSeparatorIndex).trim();
} else {
final int fragmentSeparatorIndex = url.indexOf("#");
if (fragmentSeparatorIndex >= 0) {
return url.substring(0, fragmentSeparatorIndex).trim();
} else {
return url;
}
if (url.startsWith("@")) {
return SENSITIVE_DATA_SUBSTITUTE + url;
}
final @NotNull String userInfo = url.substring(0, url.indexOf('@'));
final @NotNull String filteredUserInfo =
userInfo.contains(":")
? (SENSITIVE_DATA_SUBSTITUTE + ":" + SENSITIVE_DATA_SUBSTITUTE)
: SENSITIVE_DATA_SUBSTITUTE;
return filteredUserInfo + url.substring(url.indexOf('@'));
}

public static final class UrlDetails {
Expand Down
161 changes: 155 additions & 6 deletions sentry/src/test/java/io/sentry/util/UrlUtilsTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class UrlUtilsTest {
}

@Test
fun `splits url without query or fragment and no authority`() {
fun `splits url without query or fragment and no user info`() {
val urlDetails = UrlUtils.parse(
"https://sentry.io"
)
Expand All @@ -161,30 +161,179 @@ class UrlUtilsTest {
assertEquals("top", urlDetails.fragment)
}

// Fragment is allowed to contain '?' according to RFC 3986
@Test
fun `no details extracted with query after fragment`() {
fun `extracts details with question mark after fragment`() {
val urlDetails = UrlUtils.parse(
"https://user:password@sentry.io#fragment?q=1&s=2&token=secret"
)
assertEquals("https://[Filtered]:[Filtered]@sentry.io", urlDetails.url)
assertNull(urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `extracts details with question mark after fragment without user info`() {
val urlDetails = UrlUtils.parse(
"https://sentry.io#fragment?q=1&s=2&token=secret"
)
assertEquals("https://sentry.io", urlDetails.url)
assertNull(urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `no details extracted from malformed url due to invalid protocol`() {
val urlDetails = UrlUtils.parse(
"htps://user@sentry.io#fragment?q=1&s=2&token=secret"
)
assertNull(urlDetails.url)
assertNull(urlDetails.query)
assertNull(urlDetails.fragment)
}

@Test
fun `no details extracted with query after fragment without authority`() {
fun `no details extracted from malformed url due to # symbol in fragment`() {
val urlDetails = UrlUtils.parse(
"https://sentry.io#fragment?q=1&s=2&token=secret"
"https://example.com#hello#fragment"
)
assertNull(urlDetails.url)
assertNull(urlDetails.query)
assertNull(urlDetails.fragment)
}

@Test
fun `no details extracted from malformed url`() {
fun `strips empty user info`() {
val urlDetails = UrlUtils.parse(
"htps://user@sentry.io#fragment?q=1&s=2&token=secret"
"https://@sentry.io?query=a#fragment?q=1&s=2&token=secret"
)
assertEquals("https://[Filtered]@sentry.io", urlDetails.url)
assertEquals("query=a", urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `extracts details from relative url with leading @ symbol`() {
val urlDetails = UrlUtils.parse(
"@@sentry.io/pages/10?query=a#fragment?q=1&s=2&token=secret"
)
assertEquals("@@sentry.io/pages/10", urlDetails.url)
assertEquals("query=a", urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `extracts details from relative url with leading question mark`() {
val urlDetails = UrlUtils.parse(
"?query=a#fragment?q=1&s=2&token=secret"
)
assertEquals("", urlDetails.url)
assertEquals("query=a", urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `does not filter email address in path`() {
val urlDetails = UrlUtils.parseNullable(
"https://staging.server.com/api/v4/auth/password/reset/email@example.com"
)!!
assertEquals("https://staging.server.com/api/v4/auth/password/reset/email@example.com", urlDetails.url)
assertNull(urlDetails.query)
assertNull(urlDetails.fragment)
}

@Test
fun `does not filter email address in path with fragment`() {
val urlDetails = UrlUtils.parseNullable(
"https://staging.server.com/api/v4/auth/password/reset/email@example.com#top"
)!!
assertEquals("https://staging.server.com/api/v4/auth/password/reset/email@example.com", urlDetails.url)
assertNull(urlDetails.query)
assertEquals("top", urlDetails.fragment)
}

@Test
fun `does not filter email address in path with query and fragment`() {
val urlDetails = UrlUtils.parseNullable(
"https://staging.server.com/api/v4/auth/password/reset/email@example.com?a=b&c=d#top"
)!!
assertEquals("https://staging.server.com/api/v4/auth/password/reset/email@example.com", urlDetails.url)
assertEquals("a=b&c=d", urlDetails.query)
assertEquals("top", urlDetails.fragment)
}

@Test
fun `does not filter email address in query`() {
val urlDetails = UrlUtils.parseNullable(
"https://staging.server.com/?email=someone@example.com"
)!!
assertEquals("https://staging.server.com/", urlDetails.url)
assertEquals("email=someone@example.com", urlDetails.query)
}

@Test
fun `does not filter email address in fragment`() {
val urlDetails = UrlUtils.parseNullable(
"https://staging.server.com#email=someone@example.com"
)!!
assertEquals("https://staging.server.com", urlDetails.url)
assertEquals("email=someone@example.com", urlDetails.fragment)
}

@Test
fun `does not filter email address in fragment with query`() {
val urlDetails = UrlUtils.parseNullable(
"https://staging.server.com?q=a&b=c#email=someone@example.com"
)!!
assertEquals("https://staging.server.com", urlDetails.url)
assertEquals("q=a&b=c", urlDetails.query)
assertEquals("email=someone@example.com", urlDetails.fragment)
}

@Test
fun `extracts details from relative url with email in path`() {
val urlDetails = UrlUtils.parse(
"/emails/user@sentry.io?query=a&b=c#fragment?q=1&s=2&token=secret"
)
assertEquals("/emails/user@sentry.io", urlDetails.url)
assertEquals("query=a&b=c", urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `extracts details from relative url with email in query`() {
val urlDetails = UrlUtils.parse(
"users/10?email=user@sentry.io&b=c#fragment?q=1&s=2&token=secret"
)
assertEquals("users/10", urlDetails.url)
assertEquals("email=user@sentry.io&b=c", urlDetails.query)
assertEquals("fragment?q=1&s=2&token=secret", urlDetails.fragment)
}

@Test
fun `extracts details from relative url with email in fragment`() {
val urlDetails = UrlUtils.parse(
"users/10?email=user@sentry.io&b=c#fragment?q=1&s=2&email=user@sentry.io"
)
assertEquals("users/10", urlDetails.url)
assertEquals("email=user@sentry.io&b=c", urlDetails.query)
assertEquals("fragment?q=1&s=2&email=user@sentry.io", urlDetails.fragment)
}

@Test
fun `extracts path from file url`() {
val urlDetails = UrlUtils.parse(
"file:///users/sentry/text.txt"
)
assertEquals("file:///users/sentry/text.txt", urlDetails.url)
assertNull(urlDetails.query)
assertNull(urlDetails.fragment)
}

@Test
fun `does not extract details from websockets uri`() {
val urlDetails = UrlUtils.parse(
"wss://example.com/socket"
)
assertNull(urlDetails.url)
assertNull(urlDetails.query)
Expand Down
Loading