Skip to content

Python SDK generates requests to BigQuery API's method Tabledata:list with invalid argument alt #19246

@kennknowles

Description

@kennknowles

When stream inserts to BigQuery is involved, in io.gcp.bigquery.start_bundle, checking if table is empty generates requests to the Tabledata:list method of BigQuery API. Problem is it includes a parameter alt=json to the request, which doesn't seem accepted by the API.

Original log:

 java.util.concurrent.ExecutionException: java.lang.RuntimeException: Error received from SDK harness for instruction -4955710: Traceback (most recent call last):
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 131, in _execute
    response = task()
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 166, in <lambda>
    self._execute(lambda: worker.do_instruction(work), work)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 212, in do_instruction
    request.instruction_id)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 234, in process_bundle
    processor.process_bundle(instruction_id)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py", line 404, in process_bundle
    op.start()
  File "apache_beam/runners/worker/operations.py", line 368, in apache_beam.runners.worker.operations.DoOperation.start
    def start(self):
  File "apache_beam/runners/worker/operations.py", line 369, in apache_beam.runners.worker.operations.DoOperation.start
    with self.scoped_start_state:
  File "apache_beam/runners/worker/operations.py", line 420, in apache_beam.runners.worker.operations.DoOperation.start
    self.dofn_runner.start()
  File "apache_beam/runners/common.py", line 695, in apache_beam.runners.common.DoFnRunner.start
    self._invoke_bundle_method(self.do_fn_invoker.invoke_start_bundle)
  File "apache_beam/runners/common.py", line 692, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method
    self._reraise_augmented(exn)
  File "apache_beam/runners/common.py", line 702, in apache_beam.runners.common.DoFnRunner._reraise_augmented
    raise
  File "apache_beam/runners/common.py", line 690, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method
    bundle_method()
  File "apache_beam/runners/common.py", line 348, in apache_beam.runners.common.DoFnInvoker.invoke_start_bundle
    def invoke_start_bundle(self):
  File "apache_beam/runners/common.py", line 352, in apache_beam.runners.common.DoFnInvoker.invoke_start_bundle
    self.signature.start_bundle_method.method_value())
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery.py", line 1359, in start_bundle
    self.create_disposition, self.write_disposition)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/utils/retry.py", line 184, in wrapper
    return fun(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery.py", line 1100, in get_or_create_table
    table_empty = self._is_table_empty(project_id, dataset_id, table_id)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/utils/retry.py", line 184, in wrapper
    return fun(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery.py", line 972, in _is_table_empty
    response = self.client.tabledata.List(request)
  File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py", line 500, in List
    config, request, global_params=global_params)
  File "/usr/local/lib/python2.7/dist-packages/apitools/base/py/base_api.py", line 722, in _RunMethod
    return self.ProcessHttpResponse(method_config, http_response, request)
  File "/usr/local/lib/python2.7/dist-packages/apitools/base/py/base_api.py", line 728, in ProcessHttpResponse
    self.__ProcessHttpResponse(method_config, http_response, request))
  File "/usr/local/lib/python2.7/dist-packages/apitools/base/py/base_api.py", line 599, in __ProcessHttpResponse
    http_response, method_config=method_config, request=request)
HttpBadRequestError: HttpError accessing <https://www.googleapis.com/bigquery/v2/projects/xxxx/datasets/xxxx/tables/xxxx/data?alt=json&maxResults=1>: response: <\{'status': '400', 'content-length': '245', 'x-xss-protection': '1; mode=block', 'transfer-encoding': 'chunked', 'vary': 'Origin, X-Origin, Referer', 'server': 'ESF', '-content-encoding': 'gzip', 'cache-control': 'private', 'date': 'Sun, 09 Dec 2018 06:16:01 GMT', 'x-frame-options': 'SAMEORIGIN', 'content-type': 'application/json; charset=UTF-8'}>, content <{
  "error": {
    "code": 400,
    "message": "Request contains an invalid argument.",
    "errors": [
      {
        "message": "",
        "domain": "global",
        "reason": "invalid"
      }
    ],
    "status": "INVALID_ARGUMENT"
  }
``}
>

Imported from Jira BEAM-6198. Original Jira may contain additional context.
Reported by: tpilewicz.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions