Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions httpcore5/src/main/java/org/apache/hc/core5/http/HttpHost.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.hc.core5.net.Host;
import org.apache.hc.core5.net.NamedEndpoint;
import org.apache.hc.core5.net.URIAuthority;
import org.apache.hc.core5.net.ZoneIdSupport;
import org.apache.hc.core5.util.Args;
import org.apache.hc.core5.util.LangUtils;
import org.apache.hc.core5.util.TextUtils;
Expand Down Expand Up @@ -303,13 +304,24 @@ public InetAddress getAddress() {
*/
public String toURI() {
final StringBuilder buffer = new StringBuilder();
buffer.append(this.schemeName);
buffer.append("://");
buffer.append(this.host.toString());
buffer.append(this.schemeName).append("://");

final String hostname = this.host.getHostName();
final int port = this.host.getPort();

// Bracket only real IPv6 literals; decide using the address part only (ignore zone)
if (ZoneIdSupport.looksLikeIPv6AddressPart(hostname)) {
ZoneIdSupport.appendBracketedIPv6(buffer, hostname);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg If I understand it right #isIPv6AddressPart will be executed twice. Is this needed?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg #isIPv6AddressPart still gets called twice. Once here, and another time inside #appendBracketedIPv6

if (port >= 0) {
buffer.append(':').append(port);
}
} else {
// reg-name / IPv4 / special forms like "host:80" for CONNECT
buffer.append(this.host);
}
return buffer.toString();
}


/**
* Obtains the host string, without scheme prefix.
*
Expand Down
83 changes: 75 additions & 8 deletions httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,19 @@ public final class URIAuthority implements NamedEndpoint, Serializable {
private final String userInfo;
private final Host host;

static URIAuthority parse(final CharSequence s) throws URISyntaxException {
if (TextUtils.isBlank(s)) {
return null;
}
final Tokenizer.Cursor cursor = new Tokenizer.Cursor(0, s.length());
return parse(s, cursor); // intentionally no cursor.atEnd() check
}

static URIAuthority parse(final CharSequence s, final Tokenizer.Cursor cursor) throws URISyntaxException {
final Tokenizer tokenizer = Tokenizer.INSTANCE;
String userInfo = null;

// optional userinfo@
final int initPos = cursor.getPos();
final String token = tokenizer.parseContent(s, cursor, URISupport.HOST_DELIMITERS);
if (!cursor.atEnd() && s.charAt(cursor.getPos()) == '@') {
Expand All @@ -62,26 +72,83 @@ static URIAuthority parse(final CharSequence s, final Tokenizer.Cursor cursor) t
userInfo = token;
}
} else {
//Rewind
cursor.updatePos(initPos);
}

if (!cursor.atEnd() && s.charAt(cursor.getPos()) == '[') {
Copy link
Member

@ok2c ok2c Oct 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg God Merciful. Do we really need to do all this? I have been trying to reduce our security footprint outside of our direct area of responsibility. This is just a matter of time some "security professionals" or "security researchers" will start crawling up your rectum claiming this code is potentially vulnerable to exploits by green men from Mars and demand an CVE with their name on it. I understand, we all have to make a living, so do security researches, but do we really need all that?

Again, why are we doing this? Do we really need to parse IPv6 addresses? Really.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ok2c No IPv6 parsing—just bracket check + RFC 6874 zone encode/decode in one tiny helper; minimal surface, CONNECT untouched.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg Do not see those changes. There are also conflicts in the branch. Please double-check.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ok2c should be fine now

final int lb = cursor.getPos();
final int upper = cursor.getUpperBound();
int rb = -1;
for (int i = lb + 1; i < upper; i++) {
if (s.charAt(i) == ']') { rb = i; break; }
}
if (rb < 0) {
throw URISupport.createException(s.toString(), cursor, "Expected closing bracket for IPv6 address");
}
final String literal = s.subSequence(lb + 1, rb).toString();
final int z = literal.indexOf("%25");
final String addrPart = z >= 0 ? literal.substring(0, z) : literal;

// Minimal check: IPv6-like must have at least two colons
int colons = 0;
for (int i = 0; i < addrPart.length(); i++) {
if (addrPart.charAt(i) == ':' && ++colons >= 2) break;
}
if (colons < 2) {
throw URISupport.createException(s.toString(), cursor, "Expected an IPv6 address");
}

if (z >= 0) {
ZoneIdSupport.validateZoneIdEncoded(literal.substring(z + 3));
}
final String hostName = ZoneIdSupport.decodeZoneId(literal); // "...%25zone" → "...%zone"

// optional :port
int pos = rb + 1;
int port = -1;
if (pos < upper && s.charAt(pos) == ':') {
pos++;
if (pos >= upper || !Character.isDigit(s.charAt(pos))) {
throw URISupport.createException(s.toString(), cursor, "Invalid port");
}
long acc = 0;
while (pos < upper && Character.isDigit(s.charAt(pos))) {
acc = acc * 10 + (s.charAt(pos) - '0');
if (acc > 65535) {
throw URISupport.createException(s.toString(), cursor, "Port out of range");
}
pos++;
}
port = (int) acc;
}
cursor.updatePos(pos);
return new URIAuthority(userInfo, hostName, port);
}

// Non-bracketed authority → existing fallback.
final Host host = Host.parse(s, cursor);
return new URIAuthority(userInfo, host);
}

static URIAuthority parse(final CharSequence s) throws URISyntaxException {
final Tokenizer.Cursor cursor = new Tokenizer.Cursor(0, s.length());
return parse(s, cursor);
}


static void format(final StringBuilder buf, final URIAuthority uriAuthority) {
if (uriAuthority.getUserInfo() != null) {
buf.append(uriAuthority.getUserInfo());
buf.append("@");
buf.append(uriAuthority.getUserInfo()).append("@");
}
final String hostName = uriAuthority.getHostName();
final int port = uriAuthority.getPort();

if (ZoneIdSupport.appendBracketedIPv6(buf, hostName)) {
if (port >= 0) {
buf.append(':').append(port);
}
} else {
Host.format(buf, uriAuthority);
}
Host.format(buf, uriAuthority);
}


static String format(final URIAuthority uriAuthority) {
final StringBuilder buf = new StringBuilder();
format(buf, uriAuthority);
Expand Down
12 changes: 9 additions & 3 deletions httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -424,14 +424,16 @@ private String buildString() {
}
sb.append("@");
}
if (InetAddressUtils.isIPv6(this.host)) {
sb.append("[").append(this.host).append("]");

// Bracket only true IPv6 hosts; decide based on address part only (ignore zone)
if (ZoneIdSupport.appendBracketedIPv6(sb, this.host)) {
// wrote [IPv6%25zone]
} else {
PercentCodec.encode(sb, this.host, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.REG_NAME, false);
}
if (this.port >= 0) {
sb.append(":").append(this.port);
sb.append(':').append(this.port);
}
authoritySpecified = true;
} else {
Expand Down Expand Up @@ -478,6 +480,10 @@ private void digestURI(final URI uri, final Charset charset) {
this.host = uriHost != null && InetAddressUtils.isIPv6URLBracketed(uriHost)
? uriHost.substring(1, uriHost.length() - 1)
: uriHost;

// Normalize zone-id to user-friendly form: "...%25zone" -> "...%zone" (and decode %HH in zone)
this.host = ZoneIdSupport.decodeZoneId(this.host);

this.port = uri.getPort();
this.encodedUserInfo = uri.getRawUserInfo();
this.userInfo = uri.getUserInfo();
Expand Down
205 changes: 205 additions & 0 deletions httpcore5/src/main/java/org/apache/hc/core5/net/ZoneIdSupport.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
package org.apache.hc.core5.net;

import org.apache.hc.core5.annotation.Internal;
import org.apache.hc.core5.util.TextUtils;

@Internal
public final class ZoneIdSupport {

private ZoneIdSupport() {
}

/**
* RFC 6874 encoder for ZoneID: emits unreserved characters as-is and percent-encodes
* everything else using UTF-8 with UPPERCASE hex digits. Existing %HH triplets are
* passed through unchanged.
*/
public static String encodeZoneIdRfc6874(final CharSequence raw) {
if (raw == null || raw.length() == 0) {
return raw != null ? raw.toString() : null;
}
final StringBuilder out = new StringBuilder(raw.length() + 8);
for (int i = 0; i < raw.length(); i++) {
final char ch = raw.charAt(i);
if (unreserved(ch)) {
out.append(ch);
} else if (ch == '%' && i + 2 < raw.length()
&& TextUtils.isHex(raw.charAt(i + 1)) && TextUtils.isHex(raw.charAt(i + 2))) {
// pass through existing %HH
out.append('%').append(raw.charAt(i + 1)).append(raw.charAt(i + 2));
i += 2;
} else {
final byte[] bytes = String.valueOf(ch).getBytes(java.nio.charset.StandardCharsets.UTF_8);
final String hex = org.apache.hc.core5.util.TextUtils.toHexString(bytes)
.toUpperCase(java.util.Locale.ROOT);
for (int k = 0; k < hex.length(); k += 2) {
out.append('%').append(hex.charAt(k)).append(hex.charAt(k + 1));
}
}
}
return out.toString();
}

/**
* RFC 6874 decoder for bracket contents of an IPv6 literal.
* Input: {@code "addr%25<enc-zone>"} → Output internal form: {@code "addr%<decoded-zone>"}.
* If there is no {@code "%25"} delimiter, returns the input as-is.
*/
public static String decodeZoneId(final CharSequence host) {
if (host == null) {
return null;
}
// find "%25"
int p = -1;
for (int i = 0; i + 2 < host.length(); i++) {
if (host.charAt(i) == '%' && host.charAt(i + 1) == '2' && host.charAt(i + 2) == '5') {
p = i;
break;
}
}
if (p < 0) {
return host.toString();
}
final CharSequence addrCs = host.subSequence(0, p);
final CharSequence encZone = host.subSequence(p + 3, host.length());

final java.io.ByteArrayOutputStream baos =
new java.io.ByteArrayOutputStream(encZone.length());
for (int i = 0; i < encZone.length(); i++) {
final char ch = encZone.charAt(i);
if (ch == '%' && i + 2 < encZone.length()
&& TextUtils.isHex(encZone.charAt(i + 1)) && TextUtils.isHex(encZone.charAt(i + 2))) {
final int hi = Character.digit(encZone.charAt(i + 1), 16);
final int lo = Character.digit(encZone.charAt(i + 2), 16);
baos.write((hi << 4) + lo);
i += 2;
} else {
// Allowed unreserved in ZoneID are ASCII; copy as single byte
baos.write((byte) ch);
}
}
final String zone = new String(baos.toByteArray(), java.nio.charset.StandardCharsets.UTF_8);
return addrCs.toString() + '%' + zone;
}

/**
* RFC 6874 ZoneID validator:
* <pre>ZoneID = 1*( unreserved / pct-encoded )</pre>
* Throws {@link IllegalArgumentException} on invalid input.
*/
public static void validateZoneIdEncoded(final CharSequence enc) {
if (enc == null || enc.length() == 0) {
throw new IllegalArgumentException("ZoneID must not be empty");
}
for (int i = 0; i < enc.length(); i++) {
final char ch = enc.charAt(i);
if (unreserved(ch)) {
continue;
}
if (ch == '%' && i + 2 < enc.length()
&& TextUtils.isHex(enc.charAt(i + 1)) && TextUtils.isHex(enc.charAt(i + 2))) {
i += 2;
continue;
}
throw new IllegalArgumentException("Illegal character in ZoneID");
}
}

/**
* Heuristic: returns {@code true} if {@code host} looks like an IPv6 address-part
* (i.e., before any ZoneID) by counting colons. We do not parse/validate IPv6;
* this keeps our surface minimal while still bracketing correctly.
* <p>Rule: if the address-part (up to '%', if present) contains &gt;= 2 colons,
* treat it as IPv6-like.</p>
*/
public static boolean looksLikeIPv6AddressPart(final CharSequence host) {
if (host == null) {
return false;
}
int end = host.length();
for (int i = 0; i < end; i++) {
if (host.charAt(i) == '%') {
end = i;
break;
}
}
int colons = 0;
for (int i = 0; i < end; i++) {
if (host.charAt(i) == ':') {
colons++;
if (colons >= 2) {
return true;
}
}
}
return false;
}

/**
* Appends a bracketed IPv6 literal to {@code buf} if {@code host} looks like IPv6.
* If a ZoneID is present (after '%'), it is written as {@code "%25"} followed by the
* RFC 6874-encoded ZoneID. Returns {@code true} iff it wrote the bracketed literal.
*/
public static boolean appendBracketedIPv6(final StringBuilder buf, final CharSequence host) {
if (!looksLikeIPv6AddressPart(host)) {
return false;
}
// address part
int zoneIdx = -1;
for (int i = 0; i < host.length(); i++) {
if (host.charAt(i) == '%') {
zoneIdx = i;
break;
}
}
buf.append('[');
if (zoneIdx >= 0) {
buf.append(host, 0, zoneIdx);
} else {
buf.append(host);
}
// zone part
if (zoneIdx >= 0) {
final CharSequence zone = host.subSequence(zoneIdx + 1, host.length());
buf.append("%25").append(encodeZoneIdRfc6874(zone));
}
buf.append(']');
return true;
}

/**
* RFC 3986 unreserved characters.
*/
private static boolean unreserved(final char ch) {
return ch >= 'A' && ch <= 'Z'
|| ch >= 'a' && ch <= 'z'
|| ch >= '0' && ch <= '9'
|| ch == '-' || ch == '.' || ch == '_' || ch == '~';
}
}
Loading