Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .github/workflows/_publish-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@ on:
workflow_dispatch:

jobs:
linting:
publish:
name: Publish to Rubygems
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
with:
submodules: recursive

- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
Expand All @@ -28,10 +27,13 @@ jobs:
echo -e "---\n:rubygems_api_key: ${RUBYGEMS_API_KEY}" > ~/.gem/credentials
chmod 600 ~/.gem/credentials

- name: Build the Gem
- name: Build the Gems
run: |
gem build --strict --output mindee.gem
gem build mindee.gemspec --strict --output mindee.gem
gem build mindee-lite.gemspec --strict --output mindee-lite.gem

- name: Publish the Gem
- name: Publish the Gems
run: |
# Push both generated files to Rubygems
gem push mindee.gem
gem push mindee-lite.gem
50 changes: 50 additions & 0 deletions .github/workflows/_test-integration-lite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Run integration tests.
#
name: Integration Tests - Minimal Dependencies

on:
workflow_call:
workflow_dispatch:

env:
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }}
MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }}
MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }}
MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }}
MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }}
MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }}
MINDEE_LOG_LEVEL: DEBUG
MINDEE_GEM_NAME: mindee-lite

jobs:
integration-tests:
name: Run Integration Tests
timeout-minutes: 30
runs-on: ${{ matrix.os }}
strategy:
max-parallel: 3
matrix:
os:
- "ubuntu-24.04"
- "macos-latest"
ruby:
- "4.0"
steps:
- uses: actions/checkout@v5
with:
submodules: recursive

- name: set up Ruby ${{ matrix.ruby }}
uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
bundler-cache: true

- name: Run Rspec for integration tests
run: |
bundle exec rake integration
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# Run integration tests.
#
name: Integration Tests
name: Integration Tests - All Dependencies

on:
workflow_call:
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/_test-units-lite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#
# Run lite unit tests.
#
name: Unit Tests - Minimal Dependencies

on:
workflow_call:

env:
MINDEE_GEM_NAME: mindee-lite

jobs:
tests:
name: Run Unit Tests
timeout-minutes: 30
runs-on: ${{ matrix.os }}
strategy:
matrix:
os:
- "ubuntu-24.04"
- "macos-latest"
ruby:
- "3.2"
- "4.0"
steps:
- uses: actions/checkout@v5
with:
submodules: recursive

- name: set up Ruby ${{ matrix.ruby }}
uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
bundler-cache: true

- name: Run Rspec
env:
MINDEE_LOG_LEVEL: DEBUG
run: |
bundle exec rake spec
5 changes: 2 additions & 3 deletions .github/workflows/_test-units.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# Run unit tests.
# Run full unit tests.
#
name: Tests
name: Unit Tests - All Dependencies

on:
workflow_call:
Expand All @@ -15,7 +15,6 @@ jobs:
matrix:
os:
- "ubuntu-24.04"
- "ubuntu-22.04"
- "macos-latest"
ruby:
- "3.2"
Expand Down
20 changes: 14 additions & 6 deletions .github/workflows/pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,27 @@ permissions:
jobs:
static-analysis:
uses: ./.github/workflows/_static-analysis.yml
test-units:
test-unit:
uses: ./.github/workflows/_test-units.yml
needs: static-analysis
secrets: inherit
test-integrations:
uses: ./.github/workflows/_test-integrations.yml
needs: test-units
test-unit-lite:
uses: ./.github/workflows/_test-units.yml
needs: static-analysis
secrets: inherit
test-integration:
uses: ./.github/workflows/_test-integration.yml
needs: test-unit
secrets: inherit
test-integration-lite:
uses: ./.github/workflows/_test-integration-lite.yml
needs: test-unit-lite
secrets: inherit
test-smoke:
uses: ./.github/workflows/_test-smoke.yml
needs: test-units
needs: test-unit
secrets: inherit
test-cli:
uses: ./.github/workflows/_test-cli.yml
needs: test-units
needs: test-unit
secrets: inherit
10 changes: 9 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,12 @@
source 'https://rubygems.org'

# Specify your gem's dependencies in mindee.gemspec
gemspec

gemspec name: ENV.fetch('MINDEE_GEM_NAME', 'mindee')

group :development, :test do
gem 'openssl', '~> 4.0'
gem 'prism', '~> 1.3'
gem 'rake', '~> 13.3'
gem 'rspec', '~> 3.13'
end
36 changes: 21 additions & 15 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,39 @@

require 'rake'
require 'rspec/core/rake_task'
require 'yard'

is_lite_mode = ENV.fetch('MINDEE_GEM_NAME', 'mindee') == 'mindee-lite'

begin
require 'bundler/setup'
Bundler::GemHelper.install_tasks
require 'bundler/gem_helper'
Bundler::GemHelper.install_tasks(name: ENV.fetch('MINDEE_GEM_NAME', 'mindee'))
rescue LoadError
puts 'although not required, bundler is recommended for running the tests'
end

task default: :spec
exclusion_opts = is_lite_mode ? ['--tag', '~all_deps'] : []
RSpec::Core::RakeTask.new(:spec) do |t|
t.rspec_opts = exclusion_opts
end
unless is_lite_mode
require 'yard'
desc 'Generate documentation'
YARD::Rake::YardocTask.new(:doc) do |task|
task.files = ['lib/**/*.rb']
end

RSpec::Core::RakeTask.new(:spec)

desc 'Generate documentation'
YARD::Rake::YardocTask.new(:doc) do |task|
task.files = ['lib/**/*.rb']
Rake::Task[:doc].enhance do
FileUtils.cp_r(
File.join('docs', 'code_samples'),
File.join('docs', '_build')
)
end
end

desc 'Run integration tests'
RSpec::Core::RakeTask.new(:integration) do |t|
t.pattern = 'spec/**/*_integration.rb'
t.rspec_opts = ['--require', 'integration_helper']
end

Rake::Task[:doc].enhance do
FileUtils.cp_r(
File.join('docs', 'code_samples'),
File.join('docs', '_build')
)
t.rspec_opts = ['--require', 'integration_helper'] + exclusion_opts
end
2 changes: 1 addition & 1 deletion examples/auto_invoice_splitter_extraction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def invoice_splitter_auto_extraction(file_path)
mindee_client = Mindee::V1::Client.new(api_key: 'my-api-key')
input_source = mindee_client.source_from_path(file_path)

if input_source.pdf? && input_source.count_pages > 1
if input_source.pdf? && input_source.page_count > 1
parse_multi_page(mindee_client, input_source)
else
parse_single_page(mindee_client, input_source)
Expand Down
4 changes: 4 additions & 0 deletions lib/mindee.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
require 'mindee/v2'

module Mindee
# Dependency management
module Dependency
end

# Mindee internal error module.
module Error
end
Expand Down
29 changes: 29 additions & 0 deletions lib/mindee/dependency.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# frozen_string_literal: true

module Mindee
# Centralized check for optional heavy dependencies
module Dependency
def self.check_all_dependencies
require 'origami'
require 'mini_magick'
require 'pdf-reader'
true
rescue LoadError
false
end

@all_deps_available = check_all_dependencies

def self.all_deps_available?
check_all_dependencies
end

def self.require_all_deps!
raise LoadError, MINDEE_DEPENDENCIES_LOAD_ERROR unless all_deps_available?
end

MINDEE_DEPENDENCIES_LOAD_ERROR = 'Attempted to load Mindee PDF/Image tools without required dependencies. ' \
"If you need to process local files, please replace the 'mindee-lite' gem " \
"with the standard 'mindee' gem in your Gemfile."
end
end
1 change: 1 addition & 0 deletions lib/mindee/image/image_extractor.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true

Mindee::Dependency.require_all_deps!
require 'mini_magick'
require 'origami'
require 'stringio'
Expand Down
24 changes: 14 additions & 10 deletions lib/mindee/input/sources/local_input_source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
require 'marcel'
require 'fileutils'

require_relative '../../pdf'
require_relative '../../image'
require_relative '../../dependency'
require_relative '../../pdf' if Mindee::Dependency.all_deps_available?
require_relative '../../image' if Mindee::Dependency.all_deps_available?

module Mindee
module Input
Expand Down Expand Up @@ -142,21 +143,16 @@ def write_to_file(path)
# Defaults to one for images.
# @return [Integer]
def page_count
unless Mindee::Dependency.all_deps_available?
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
end
return 1 unless pdf?

@io_stream.seek(0)
pdf_processor = Mindee::PDF::PDFProcessor.open_pdf(@io_stream)
pdf_processor.pages.size
end

# Returns the page count for a document.
# Defaults to one for images.
# @return [Integer]
# @deprecated Use {#page_count} instead.
def count_pages
page_count
end

# Compresses the file, according to the provided info.
# @param [Integer] quality Quality of the output file.
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
Expand All @@ -167,6 +163,10 @@ def count_pages
# @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
# not. Needs force_source_text to work.
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
unless Mindee::Dependency.all_deps_available?
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
end

buffer = if pdf?
Mindee::PDF::PDFCompressor.compress_pdf(
@io_stream,
Expand All @@ -189,6 +189,10 @@ def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: f
# Checks whether the file has source text if it is a pdf. `false` otherwise
# @return [bool] `true` if the file is a PDF and has source text.
def source_text?
unless Mindee::Dependency.all_deps_available?
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
end

Mindee::PDF::PDFTools.source_text?(@io_stream)
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/mindee/pdf/pdf_compressor.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# frozen_string_literal: true

Mindee::Dependency.require_all_deps!
require 'pdf-reader'

# Shorthand for pdf-reader's PDF namespace, to avoid mixups with the local Origami fork.
PDFReader = PDF

Expand Down
4 changes: 4 additions & 0 deletions lib/mindee/pdf/pdf_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ module PDF
class PDFExtractor
# @param local_input [Mindee::Input::Source::LocalInputSource]
def initialize(local_input)
unless Mindee::Dependency.all_deps_available?
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
end

@filename = local_input.filename
if local_input.pdf?
@source_pdf = local_input.io_stream
Expand Down
1 change: 1 addition & 0 deletions lib/mindee/pdf/pdf_processor.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true

Mindee::Dependency.require_all_deps!
require 'origami'
require_relative 'pdf_tools'

Expand Down
Loading
Loading