Skip to content

Commit be33216

Browse files
committed
Header validation changes for Tind Spreadsheet validator
1 parent faedf23 commit be33216

5 files changed

Lines changed: 46 additions & 16 deletions

File tree

app/lib/tind_spread/spread_tool.rb

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,24 +75,28 @@ def find_largest(ffts)
7575
highest
7676
end
7777

78-
# rubocop:disable Lint/UselessAssignment
78+
# rubocop:disable Lint/UselessAssignment, Metrics/AbcSize
7979
def get_files(file_pattern)
8080
dir = "#{Rails.application.config.tind_data_root_dir}/#{@directory.delete_prefix('/')}"
81-
if file_pattern.nil?
81+
if file_pattern.nil? || !file_pattern.respond_to?(:gsub)
8282
matches = []
8383
else
8484
file_match = file_pattern.gsub(/\.tif/i, '')
8585
matches = File.directory?("#{dir}/#{file_pattern}") ? Dir.glob("#{dir}/*/#{file_match}*") : Dir.glob("#{dir}/#{file_match}*")
8686
matches.map! { |i| i.gsub(Rails.application.config.tind_data_root_dir, 'https://digitalassets.lib.berkeley.edu') }
8787
end
8888
end
89-
# rubocop:enable Lint/UselessAssignment
89+
# rubocop:enable Lint/UselessAssignment, Metrics/AbcSize
9090

9191
def get_ffts(all_rows)
9292
ffts = []
9393
all_rows.each do |row_data|
9494
file_pattern = get_filename(row_data)
95-
ffts << urls_to_fft(get_files(file_pattern))
95+
ffts << if file_pattern.is_a?(String) && file_pattern.present?
96+
urls_to_fft(get_files(file_pattern))
97+
else
98+
{}
99+
end
96100
end
97101
ffts
98102
end

app/lib/tind_spread/tind_validation.rb

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,16 @@ module TindValidation
77
# validates the header row
88
# should be 3 digits for field, 2 for indicator (can be _, number), one digit or number for subfield
99
# optionally can have a ('-' followed by a number). This is used to group columns into similar fields
10-
# the header row can also be just "Filename" or "FFT". The program will create the proper fields for those
10+
# the header row can also be just "Filename" or "001". The program will create the proper fields for those
1111
def self.valid_header?(str)
12-
str.match?(/\d{3}[_|\d]{2}[a-zA-Z0-9](-\d+)?$/) || str.match?(/Filename|FFT/i)
12+
valid_patterns = [
13+
/^\d+:001/,
14+
/^\d+:\d{3}[_|\d]{2}[a-zA-Z0-9](-\d+)?$/,
15+
/^\d+:Filename$/i,
16+
/FFT/
17+
]
18+
19+
valid_patterns.any? { |pattern| pattern.match?(str) }
1320
end
1421

1522
# runs a set of validations against a single row.

spec/lib/tind_spread/spread_tool_spec.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,22 @@
8585
expect(spread_tool.spread_to_hash(header)).to eq(expected_result)
8686
end
8787
end
88+
89+
describe '#get_ffts' do
90+
it 'skips get_files when the filename is blank' do
91+
row_data = [{ '0:Filename' => nil, '1:Header2' => 'Data2' }]
92+
93+
expect(spread_tool).not_to receive(:get_files)
94+
95+
expect(spread_tool.get_ffts(row_data)).to eq([{}])
96+
end
97+
98+
it 'skips get_files when the filename is a hash' do
99+
row_data = [{ '0:Filename' => { name: 'Data1' }, '1:Header2' => 'Data2' }]
100+
101+
expect(spread_tool).not_to receive(:get_files)
102+
103+
expect(spread_tool.get_ffts(row_data)).to eq([{}])
104+
end
105+
end
88106
end

spec/lib/tind_spread/tind_batch_spec.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
let(:args) { { directory:, '982__a': 'test' } }
1616
let(:tind_batch) { described_class.new(args, xlsx, extension, email) }
1717
let(:spread_tool) { instance_double(TindSpread::SpreadTool) }
18-
let(:all_rows) { [{ '001__a' => 'Data1', '245__a' => 'Data2' }, { '001__a' => 'Data3', '245__a' => 'Data4' }] }
18+
let(:all_rows) { [{ '1:001__a' => 'Data1', '1:245__a' => 'Data2' }, { '1:001__a' => 'Data3', '1:245__a' => 'Data4' }] }
1919

2020
before do
2121
allow(TindSpread::SpreadTool).to receive(:new).with(xlsx, extension, directory).and_return(spread_tool)
@@ -76,7 +76,7 @@
7676

7777
describe '#validate_header_row' do
7878
it 'returns an empty array for valid headers' do
79-
headers = %w[001__a 245__a 500__3]
79+
headers = %w[1:001__a 1:245__a 1:500__3]
8080
errors = tind_batch.validate_header_row(headers)
8181
expect(errors).to be_empty
8282
end
@@ -89,7 +89,7 @@
8989
end
9090

9191
it 'returns errors only for invalid headers in a mixed list' do
92-
headers = %w[001__a InvalidHeader 245__a]
92+
headers = %w[1:001__a InvalidHeader 1:245__a]
9393
errors = tind_batch.validate_header_row(headers)
9494
expect(errors).to include('Invalid header name: InvalidHeader')
9595
expect(errors.length).to eq(1)

spec/lib/tind_spread/tind_validation_spec.rb

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,17 @@
111111

112112
describe '.valid_header?' do
113113
it 'returns true for a valid header with standard format' do
114-
expect(described_class.valid_header?('001__a')).to be true
115-
expect(described_class.valid_header?('245__a')).to be true
116-
expect(described_class.valid_header?('Filename')).to be true
117-
expect(described_class.valid_header?('100_1a')).to be true
114+
expect(described_class.valid_header?('1:001')).to be true
115+
expect(described_class.valid_header?('1:001__a')).to be true
116+
expect(described_class.valid_header?('1:245__a')).to be true
117+
expect(described_class.valid_header?('1:Filename')).to be true
118+
expect(described_class.valid_header?('1:100_1a')).to be true
118119
end
119120

120121
it 'returns true for a valid header with suffix format' do
121-
expect(described_class.valid_header?('001__a-1')).to be true
122-
expect(described_class.valid_header?('245__a-2')).to be true
123-
expect(described_class.valid_header?('500__3-5')).to be true
122+
expect(described_class.valid_header?('1:001__a-1')).to be true
123+
expect(described_class.valid_header?('1:245__a-2')).to be true
124+
expect(described_class.valid_header?('1:500__3-5')).to be true
124125
end
125126

126127
it 'returns false for invalid headers' do

0 commit comments

Comments
 (0)