Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion perfkitbenchmarker/benchmark_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
from perfkitbenchmarker import providers
from perfkitbenchmarker import relational_db
from perfkitbenchmarker import resource as resource_type
from perfkitbenchmarker import resources # pylint:disable=unused-import # Load the __init__.py
from perfkitbenchmarker import smb_service
from perfkitbenchmarker import stages
from perfkitbenchmarker import static_virtual_machine as static_vm
Expand Down
2 changes: 0 additions & 2 deletions perfkitbenchmarker/configs/benchmark_config_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
from perfkitbenchmarker.configs import vm_group_decoders
from perfkitbenchmarker.resources import example_resource_spec
from perfkitbenchmarker.resources import jobs_setter
# Included to import & load Kubernetes' __init__.py somewhere.
from perfkitbenchmarker.resources import kubernetes # pylint:disable=unused-import
from perfkitbenchmarker.resources import managed_ai_model_spec
from perfkitbenchmarker.resources.pinecone import pinecone_resource_spec
from perfkitbenchmarker.resources.vertex_vector_search import vvs_resource_spec
Expand Down
89 changes: 71 additions & 18 deletions perfkitbenchmarker/configs/default_benchmark_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -390,24 +390,6 @@ postgres_sysbench_tpcc:
sysbench_scale: 100
sysbench_use_fk: False

mongodb:
name: mongodb_ycsb
flags:
mongodb_readahead_kb: 8
iostat: True
sar: True
sar_interval: 1
ycsb_fail_on_incomplete_loading: True
ycsb_measurement_type: hdrhistogram
ycsb_status: True
ycsb_status_interval_sec: 1
ycsb_operation_count: 1000000000
ycsb_record_command_line: False
ycsb_run_parameters: dataintegrity=true,readallfields=true,writeallfields=true
timeout_minutes: 360
mongodb_primary_only: True
mongodb_pss: False

fio_latency:
name: fio
flags:
Expand Down Expand Up @@ -461,3 +443,74 @@ hadoop_dfsio:
disk_size: 6830
disk_type: hyperdisk-throughput
provisioned_throughput: 600

diskspd_read_only:
name: diskspd
flags: &diskspd_read_only_flags
diskspd_prefill_duration: 2700
diskspd_write_read_ratio: 0

diskspd_write_only:
name: diskspd
flags: &diskspd_write_only_flags
diskspd_write_read_ratio: 100

diskspd_single_thread_latency:
name: diskspd
flags: &diskspd_single_thread_latency_flags
diskspd_thread_number_per_file: 1
diskspd_outstanding_io: 1
diskspd_block_size: 4k
diskspd_access_pattern: r

diskspd_max_iops:
name: diskspd
flags: &diskspd_max_iops_flags
diskspd_thread_number_per_file: 1,4,8,16
diskspd_outstanding_io: 128
diskspd_block_size: 4k
diskspd_access_pattern: r

diskspd_max_throughput:
name: diskspd
flags: &diskspd_max_throughput_flags
diskspd_thread_number_per_file: 8,16
diskspd_outstanding_io: 64,128
diskspd_block_size: 1M
diskspd_access_pattern: s

diskspd_single_thread_latency_read_only:
name: diskspd
flags:
<<: *diskspd_read_only_flags
<<: *diskspd_single_thread_latency_flags

diskspd_single_thread_latency_write_only:
name: diskspd
flags:
<<: *diskspd_write_only_flags
<<: *diskspd_single_thread_latency_flags

diskspd_max_iops_read_only:
name: diskspd
flags:
<<: *diskspd_read_only_flags
<<: *diskspd_max_iops_flags

diskspd_max_iops_write_only:
name: diskspd
flags:
<<: *diskspd_write_only_flags
<<: *diskspd_max_iops_flags

diskspd_max_throughput_read_only:
name: diskspd
flags:
<<: *diskspd_read_only_flags
<<: *diskspd_max_throughput_flags

diskspd_max_throughput_write_only:
name: diskspd
flags:
<<: *diskspd_write_only_flags
<<: *diskspd_max_throughput_flags
4 changes: 2 additions & 2 deletions perfkitbenchmarker/data/docker/flink/cloudbuild.yaml.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
steps:
- name: gcr.io/cloud-builders/gsutil
args: ['cp', {{ dpb_job_jarfile }}, 'job.jar']
- name: gcr.io/cloud-builders/gcloud
args: ['storage', 'cp', {{ dpb_job_jarfile }}, 'job.jar']
- name: 'gcr.io/cloud-builders/docker'
args: ['build',
'--build-arg',
Expand Down
4 changes: 4 additions & 0 deletions perfkitbenchmarker/data/spark/spark-defaults.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ spark.master=spark://{{ leader_ip }}:7077
# Hadoop configs prefixed with spark.hadoop
spark.hadoop.fs.defaultFS=hdfs://{{ leader_ip }}/

{% if shuffle_partitions is not none %}
spark.sql.shuffle.partitions={{ shuffle_partitions }}
{% endif %}

{% for key, value in spark_conf.items() %}
{{ key }}={{ value }}
{% endfor %}
3 changes: 3 additions & 0 deletions perfkitbenchmarker/edw_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@

FLAGS = flags.FLAGS

EDW_PYTHON_DRIVER_LIB_FILE = 'edw_python_driver_lib.py'
EDW_PYTHON_DRIVER_LIB_DIR = 'edw/common/clients/python'

TYPE_2_PROVIDER = dict([
('athena', 'aws'),
('redshift', 'aws'),
Expand Down
21 changes: 21 additions & 0 deletions perfkitbenchmarker/linux_benchmarks/dpb_sparksql_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"""

from collections.abc import MutableMapping
import functools
import json
import logging
import os
Expand All @@ -53,11 +54,13 @@
from typing import Any, List

from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import configs
from perfkitbenchmarker import dpb_constants
from perfkitbenchmarker import dpb_service
from perfkitbenchmarker import dpb_sparksql_benchmark_helper
from perfkitbenchmarker import errors
from perfkitbenchmarker import linux_virtual_machine
from perfkitbenchmarker import object_storage_service
from perfkitbenchmarker import sample
from perfkitbenchmarker import temp_dir
Expand Down Expand Up @@ -123,6 +126,10 @@
' DPB services.',
)

_READAHEAD_KB = flags.DEFINE_integer(
'sparksql_readahead_kb', None, 'Configure block device readahead settings.'
)

FLAGS = flags.FLAGS

LOG_RESULTS_PATTERN = (
Expand Down Expand Up @@ -214,6 +221,14 @@ def CheckPrerequisites(benchmark_config):
)


def _PrepareNode(vm: linux_virtual_machine.BaseLinuxVirtualMachine) -> None:
if _READAHEAD_KB.value is not None:
vm.SetReadAhead(
_READAHEAD_KB.value * 2,
[d.GetDevicePath() for d in vm.scratch_disks],
)


def Prepare(benchmark_spec):
"""Installs and sets up dataset on the Spark clusters.

Expand All @@ -223,6 +238,12 @@ def Prepare(benchmark_spec):
Args:
benchmark_spec: The benchmark specification
"""
# Only unmanaged dpb services are VM-aware
if benchmark_spec.dpb_service.CLOUD == 'Unmanaged':
nodes = benchmark_spec.dpb_service.vms['worker_group']
partials = [functools.partial(_PrepareNode, node) for node in nodes]
background_tasks.RunThreaded((lambda f: f()), partials)

cluster = benchmark_spec.dpb_service
storage_service = cluster.storage_service

Expand Down
67 changes: 62 additions & 5 deletions perfkitbenchmarker/linux_benchmarks/mongodb_ycsb_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@
from perfkitbenchmarker.linux_packages import ycsb

flags.DEFINE_integer(
'mongodb_readahead_kb', None, 'Configure block device readahead settings.'
'mongodb_readahead_kb', 8, 'Configure block device readahead settings.'
)
flags.DEFINE_bool(
'mongodb_primary_only',
False,
True,
'Run with a simple primary-only setup. Mutually exclusive with'
' --mongodb_pss. If both are False, the default PSA setup will be used.',
)
Expand Down Expand Up @@ -78,7 +78,19 @@
description: Run YCSB against MongoDB.
vm_groups:
primary:
vm_spec: *default_dual_core
vm_spec:
GCP:
machine_type: n4-standard-2
zone: us-central1-b
boot_disk_size: 100
Azure:
machine_type: Standard_D2s_v6
zone: eastus-1
boot_disk_size: 100
AWS:
machine_type: m7i.large
zone: us-east-1a
boot_disk_size: 100
disk_spec:
GCP:
disk_size: 500
Expand All @@ -94,7 +106,19 @@
mount_point: /scratch
vm_count: 1
secondary:
vm_spec: *default_dual_core
vm_spec:
GCP:
machine_type: n4-standard-2
zone: us-central1-b
boot_disk_size: 100
Azure:
machine_type: Standard_D2s_v6
zone: eastus-1
boot_disk_size: 100
AWS:
machine_type: m7i.large
zone: us-east-1a
boot_disk_size: 100
disk_spec:
GCP:
disk_size: 500
Expand All @@ -110,7 +134,19 @@
mount_point: /scratch
vm_count: 1
secondary_2:
vm_spec: *default_dual_core
vm_spec:
GCP:
machine_type: n4-standard-2
zone: us-central1-b
boot_disk_size: 100
Azure:
machine_type: Standard_D2s_v6
zone: eastus-1
boot_disk_size: 100
AWS:
machine_type: m7i.large
zone: us-east-1a
boot_disk_size: 100
disk_spec:
GCP:
disk_size: 500
Expand Down Expand Up @@ -138,6 +174,27 @@
fstab_options: noatime
enable_transparent_hugepages: false
create_and_boot_post_task_delay: 5
ycsb_fail_on_incomplete_loading: True
ycsb_measurement_type: hdrhistogram
ycsb_status: True
ycsb_status_interval_sec: 1
ycsb_operation_count: 1000000000
ycsb_record_command_line: False
ycsb_run_parameters: dataintegrity=true,readallfields=true,writeallfields=true
ycsb_client_vms: 1
ycsb_preload_threads: 512
ycsb_threads_per_client: 2048
ycsb_workload_files: workloadac,workloada,workloadx
ycsb_field_count: 10
ycsb_field_length: 100
ycsb_record_count: 20000000
ycsb_sleep_after_load_in_sec: 300
ycsb_timelimit: 300
ycsb_sleep_between_thread_runs_sec: 120
os_type: ubuntu2204
timeout_minutes: 360
sar: True
sar_interval: 1
"""

_LinuxVM = linux_virtual_machine.BaseLinuxVirtualMachine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import copy
import logging
import re
import time

from absl import flags
Expand Down Expand Up @@ -105,10 +106,27 @@
_OLTP_WRITE_ONLY = 'oltp_write_only'
_OLTP = [_OLTP_READ_WRITE, _OLTP_READ_ONLY, _OLTP_WRITE_ONLY]

_SHARED_BUFFER_SIZE = flags.DEFINE_integer(
_SHARED_BUFFER_SIZE = flags.DEFINE_string(
'postgresql_shared_buffer_size',
10,
'Size of the shared buffer in the postgresql cluster (in Gb).',
'10G',
'Size of the shared buffer in the postgresql cluster.'
'Format: <size>[<unit>], where <unit> is one of (B, K, M, G). '
'Example: 16G, 512M. If no unit is specified, G is assumed by default.',
)


def _ValidateSharedBufferSizeFlagValue(value: str) -> bool:
"""Validates the shared buffer size flag's format."""
# Checks for one or more digits, optionally followed by B, K, M, or G.
return bool(re.fullmatch(r'^\d+[BKMG]?$', value))

flags.register_validator(
_SHARED_BUFFER_SIZE,
_ValidateSharedBufferSizeFlagValue,
message=(
'--postgresql_shared_buffer_size must be in the format <size>[<unit>] '
'where <unit> is one of (B, K, M, G). Example: 16G, 512M, 1024K, 2048B.'
)
)
_MEASURE_MAX_QPS = flags.DEFINE_bool(
'postgresql_measure_max_qps',
Expand All @@ -123,6 +141,16 @@
)


def GetBufferSize() -> str:
"""Returns the buffer key for the given buffer size."""
buffer_size = _SHARED_BUFFER_SIZE.value
if buffer_size.endswith(
('B', 'K', 'M', 'G')
):
return buffer_size
return f'{buffer_size}G'


def GetConfig(user_config):
"""Get the benchmark config, applying user overrides.

Expand Down Expand Up @@ -185,7 +213,7 @@ def Prepare(benchmark_spec: bm_spec.BenchmarkSpec):
postgresql.ConfigureAndRestart(
primary_server,
FLAGS.run_uri,
_SHARED_BUFFER_SIZE.value,
GetBufferSize(),
_CONF_TEMPLATE_PATH.value,
)
for index, replica in enumerate(replica_servers):
Expand All @@ -194,7 +222,7 @@ def Prepare(benchmark_spec: bm_spec.BenchmarkSpec):
replica,
index,
FLAGS.run_uri,
_SHARED_BUFFER_SIZE.value,
GetBufferSize(),
_CONF_TEMPLATE_PATH.value,
)
clients = benchmark_spec.vm_groups['client']
Expand Down
Loading