Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions benchmark/file_basename.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
prelude: |
# frozen_string_literal: true
benchmark:
long: File.basename("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
long_name: File.basename("Users_george_src_github.com_ruby_ruby_benchmark_file_dirname.yml")
withext: File.basename("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml", ".yml")
88 changes: 58 additions & 30 deletions file.c
Original file line number Diff line number Diff line change
Expand Up @@ -3749,7 +3749,7 @@ strrdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc)
}

static char *
chompdirsep(const char *path, const char *end, rb_encoding *enc)
chompdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc)
{
while (path < end) {
if (isdirsep(*path)) {
Expand All @@ -3758,7 +3758,7 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc)
if (path >= end) return (char *)last;
}
else {
Inc(path, end, true, enc);
Inc(path, end, mb_enc, enc);
}
}
return (char *)path;
Expand All @@ -3768,7 +3768,7 @@ char *
rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
{
if (path < end && isdirsep(*path)) path++;
return chompdirsep(path, end, enc);
return chompdirsep(path, end, true, enc);
}

static rb_encoding *
Expand Down Expand Up @@ -4088,7 +4088,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
rb_enc_associate(result, enc = fs_enc_check(result, fname));
p = pend;
}
p = chompdirsep(skiproot(buf, p), p, enc);
p = chompdirsep(skiproot(buf, p), p, true, enc);
s += 2;
}
}
Expand All @@ -4113,7 +4113,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
}
else
#endif /* defined DOSISH || defined __CYGWIN__ */
p = chompdirsep(skiproot(buf, p), p, enc);
p = chompdirsep(skiproot(buf, p), p, true, enc);
}
else {
size_t len;
Expand Down Expand Up @@ -4656,7 +4656,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
root_found:
RSTRING_GETMEM(resolved, prefixptr, prefixlen);
pend = prefixptr + prefixlen;
ptr = chompdirsep(prefixptr, pend, enc);
ptr = chompdirsep(prefixptr, pend, true, enc);
if (ptr < pend) {
prefixlen = ++ptr - prefixptr;
rb_str_set_len(resolved, prefixlen);
Expand Down Expand Up @@ -4910,22 +4910,31 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc)
return 0;
}

const char *
ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc)
static inline const char *
enc_find_basename(const char *name, long *baselen, long *alllen, bool mb_enc, rb_encoding *enc)
{
const char *p, *q, *e, *end;
#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
const char *root;
#endif
long f = 0, n = -1;

end = name + (alllen ? (size_t)*alllen : strlen(name));
name = skipprefix(name, end, true, enc);
long len = (alllen ? (size_t)*alllen : strlen(name));

if (len <= 0) {
return name;
}

end = name + len;
name = skipprefix(name, end, mb_enc, enc);
#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC
root = name;
#endif
while (isdirsep(*name))

while (isdirsep(*name)) {
name++;
}

if (!*name) {
p = name - 1;
f = 1;
Expand All @@ -4947,32 +4956,47 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin
#endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */
}
else {
if (!(p = strrdirsep(name, end, true, enc))) {
p = strrdirsep(name, end, mb_enc, enc);
if (!p) {
p = name;
}
else {
while (isdirsep(*p)) p++; /* skip last / */
while (isdirsep(*p)) {
p++; /* skip last / */
}
}
#if USE_NTFS
n = ntfs_tail(p, end, enc) - p;
#else
n = chompdirsep(p, end, enc) - p;
n = chompdirsep(p, end, mb_enc, enc) - p;
#endif
for (q = p; q - p < n && *q == '.'; q++);
for (e = 0; q - p < n; Inc(q, end, true, enc)) {
for (e = 0; q - p < n; Inc(q, end, mb_enc, enc)) {
if (*q == '.') e = q;
}
if (e) f = e - p;
else f = n;
if (e) {
f = e - p;
}
else {
f = n;
}
}

if (baselen)
if (baselen) {
*baselen = f;
if (alllen)
}
if (alllen) {
*alllen = n;
}
return p;
}

const char *
ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc)
{
return enc_find_basename(name, baselen, alllen, true, enc);
}

/*
* call-seq:
* File.basename(file_name [, suffix] ) -> base_name
Expand All @@ -4993,7 +5017,7 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin
static VALUE
rb_file_s_basename(int argc, VALUE *argv, VALUE _)
{
VALUE fname, fext, basename;
VALUE fname, fext;
const char *name, *p;
long f, n;
rb_encoding *enc;
Expand All @@ -5006,15 +5030,19 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _)
enc = rb_str_enc_get(fext);
}
fname = argv[0];
FilePathStringValue(fname);
CheckPath(fname, name);
if (NIL_P(fext) || !(enc = rb_enc_compatible(fname, fext))) {
enc = rb_enc_get(fname);
enc = rb_str_enc_get(fname);
fext = Qnil;
}
if ((n = RSTRING_LEN(fname)) == 0 || !*(name = RSTRING_PTR(fname)))
return rb_str_new_shared(fname);

p = ruby_enc_find_basename(name, &f, &n, enc);
n = RSTRING_LEN(fname);
if (n == 0 || !*name) {
rb_enc_str_new(0, 0, enc);
}

bool mb_enc = !rb_str_encindex_fastpath(rb_enc_to_index(enc));
p = enc_find_basename(name, &f, &n, mb_enc, enc);
if (n >= 0) {
if (NIL_P(fext)) {
f = n;
Expand All @@ -5027,12 +5055,12 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _)
}
RB_GC_GUARD(fext);
}
if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname);
if (f == RSTRING_LEN(fname)) {
return rb_str_new_shared(fname);
}
}

basename = rb_str_new(p, f);
rb_enc_copy(basename, fname);
return basename;
return rb_enc_str_new(p, f, enc);
}

static VALUE rb_file_dirname_n(VALUE fname, int n);
Expand Down Expand Up @@ -5350,7 +5378,7 @@ rb_file_join_ary(VALUE ary)
rb_enc_copy(result, tmp);
}
else {
tail = chompdirsep(name, name + len, rb_enc_get(result));
tail = chompdirsep(name, name + len, true, rb_enc_get(result));
if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) {
rb_str_set_len(result, tail - name);
}
Expand Down
8 changes: 7 additions & 1 deletion internal/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ enum ruby_rstring_private_flags {
static inline bool
rb_str_encindex_fastpath(int encindex)
{
// The overwhelming majority of strings are in one of these 3 encodings.
// The overwhelming majority of strings are in one of these 3 encodings,
// which are all either ASCII or perfect ASCII supersets.
// Hence you can use fast, single byte algorithms on them, such as `memchr` etc,
// without all the overhead of fetching the rb_encoding and using functions such as
// rb_enc_mbminlen etc.
// Many other encodings could qualify, but they are expected to be rare occurences,
// so it's better to keep that list small.
switch (encindex) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
Expand Down
5 changes: 0 additions & 5 deletions lib/rubygems/ext/builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,6 @@ def initialize(spec, build_args = spec.build_args, target_rbconfig = Gem.target_
@gem_dir = spec.full_gem_path
@target_rbconfig = target_rbconfig
@build_jobs = build_jobs

@ran_rake = false
end

##
Expand All @@ -177,7 +175,6 @@ def builder_for(extension) # :nodoc:
when /configure/ then
Gem::Ext::ConfigureBuilder
when /rakefile/i, /mkrf_conf/i then
@ran_rake = true
Gem::Ext::RakeBuilder
when /CMakeLists.txt/ then
Gem::Ext::CmakeBuilder.new
Expand Down Expand Up @@ -250,8 +247,6 @@ def build_extensions
FileUtils.rm_f @spec.gem_build_complete_path

@spec.extensions.each do |extension|
break if @ran_rake

build_extension extension, dest_path
end

Expand Down
26 changes: 26 additions & 0 deletions spec/ruby/core/file/basename_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,34 @@
File.basename("c:\\bar.txt", ".*").should == "bar"
File.basename("c:\\bar.txt.exe", ".*").should == "bar.txt"
end

it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence" do
# dir\fileソname.txt
path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS)
path.valid_encoding?.should be_true
File.basename(path).should == "file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS)
end
end

it "rejects strings encoded with non ASCII-compatible encodings" do
Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc|
begin
path = "/foo/bar".encode(enc)
rescue Encoding::ConverterNotFoundError
next
end

-> {
File.basename(path)
}.should raise_error(Encoding::CompatibilityError)
end
end

it "works with all ASCII-compatible encodings" do
Encoding.list.select(&:ascii_compatible?).each do |enc|
File.basename("/foo/bar".encode(enc)).should == "bar".encode(enc)
end
end

it "returns the extension for a multibyte filename" do
File.basename('/path/Офис.m4a').should == "Офис.m4a"
Expand Down
53 changes: 52 additions & 1 deletion test/rubygems/test_gem_ext_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def setup

@spec = util_spec "a"

@builder = Gem::Ext::Builder.new @spec, ""
@builder = Gem::Ext::Builder.new @spec
end

def teardown
Expand Down Expand Up @@ -201,6 +201,57 @@ def test_build_extensions_install_ext_only
Gem.configuration.install_extension_in_lib = @orig_install_extension_in_lib
end

def test_build_multiple_extensions
pend if RUBY_ENGINE == "truffleruby"
pend "terminates on ruby/ruby" if ruby_repo?

extension_in_lib do
@spec.extensions << "ext/Rakefile"
@spec.extensions << "ext/extconf.rb"

ext_dir = File.join @spec.gem_dir, "ext"

FileUtils.mkdir_p ext_dir

extconf_rb = File.join ext_dir, "extconf.rb"
rakefile = File.join ext_dir, "Rakefile"

File.open extconf_rb, "w" do |f|
f.write <<-'RUBY'
require 'mkmf'

create_makefile 'a'
RUBY
end

File.open rakefile, "w" do |f|
f.write <<-RUBY
task :default do
FileUtils.touch File.join "#{ext_dir}", 'foo'
end
RUBY
end

ext_lib_dir = File.join ext_dir, "lib"
FileUtils.mkdir ext_lib_dir
FileUtils.touch File.join ext_lib_dir, "a.rb"
FileUtils.mkdir File.join ext_lib_dir, "a"
FileUtils.touch File.join ext_lib_dir, "a", "b.rb"

use_ui @ui do
@builder.build_extensions
end

assert_path_exist @spec.extension_dir
assert_path_exist @spec.gem_build_complete_path
assert_path_exist File.join @spec.gem_dir, "ext", "foo"
assert_path_exist File.join @spec.extension_dir, "gem_make.out"
assert_path_exist File.join @spec.extension_dir, "a.rb"
assert_path_exist File.join @spec.gem_dir, "lib", "a.rb"
assert_path_exist File.join @spec.gem_dir, "lib", "a", "b.rb"
end
end

def test_build_extensions_none
use_ui @ui do
@builder.build_extensions
Expand Down