Completed DynamoDB + DAX Benchmarker with a nice TUI to boot

This commit is contained in:
hamilcarBarca17
2023-08-02 18:11:41 -06:00
parent 09862c1b43
commit e42070eefa
55 changed files with 3574 additions and 1 deletions
+18
View File
@@ -0,0 +1,18 @@
/target
/.idea/
Cargo.lock
/.scannerwork/
*.exe
*.exe~
*.dll
*.so
*.dylib
*.test
*.out
.idea
out/
main
dynamodb-benchmarker
dax-benchmarker
*.log
*.json
+6
View File
@@ -0,0 +1,6 @@
run:
skip-dirs:
- cdk
- src
- scripts
- target
+25
View File
@@ -0,0 +1,25 @@
[package]
name = "dynamodb-benchmarker"
version = "0.1.0"
authors = ["Alex Clarke <alex.j.tusa@gmail.com>"]
description = "A CLI tool for simulating heavy usage against DynamoDB and publishing metrics to an Elastic Stack for analysis"
readme = "README.md"
edition = "2021"
[dependencies]
anyhow = "1.0.71"
aws-config = "0.55.3"
aws-sdk-dynamodb = "0.28.0"
aws-types = "0.55.3"
chrono = { version = "0.4.26", features = ["serde"] }
clap = { version = "4.3.14", features = ["derive"] }
elasticsearch = "8.5.0-alpha.1"
lipsum = "0.9.0"
log = "0.4.19"
log4rs = { version = "1.2.0", features = ["console_appender"] }
rand = "0.8.5"
serde = { version = "1.0.171", features = ["derive"] }
serde_json = { version = "1.0.102", features = ["arbitrary_precision"] }
tokio = { version = "1.29.1", features = ["full"] }
tokio-util = "0.7.8"
uuid = { version = "1.4.0", features = ["v4", "fast-rng"] }
+36
View File
@@ -0,0 +1,36 @@
#!make
default: build
.PHONY: init start-elastic-stack stop-elastic-stack build build-dynamodb-benchmarker build-dax-benchmarker run-dynamodb-benchmarker run-dax-benchmarker clean lint
init: build
@[[ -d ../docker-elk ]] || git clone https://github.com/deviantony/docker-elk.git ..
@cd ../docker-elk && docker compose up setup
@echo "Default login creds: username=elastic, password=changeme"
start-elastic-stack:
@cd ../docker-elk && docker compose up -d
stop-elastic-stack:
@cd ../docker-elk && docker compose down
build-dynamodb-benchmarker:
@cargo clean && rm -f dynamodb-benchmarker && cargo build --release && mv ./target/release/dynamodb-benchmarker .
build-dax-benchmarker:
@rm -f main && rm -f dax-benchmarker && go build -o dax-benchmarker pkg/app/main.go
build: build-dynamodb-benchmarker build-dax-benchmarker
run-dynamodb-benchmarker:
@cargo run
run-dax-benchmarker:
@go run pkg/app/main.go
clean:
@cargo clean && rm -f main && rm -f dynamodb-benchmarker && rm -f dax-benchmarker && rm -rf cdk/cdk.out && rm -rf cdk/node_modules
lint:
@cargo clippy && golangci-lint run
+279 -1
View File
@@ -1 +1,279 @@
# dynamodb-dax-benchmarker # DynamoDB + DAX Benchmarker
This project houses the Rust and Go code to benchmark the performance of DynamoDB and DAX by simulating heavy loads.
![main_menu](./screenshots/dynamodb-dax-benchmarker.png)
![advanced_mode](./screenshots/advanced-mode.png)
![ansible_playbook_tail](./screenshots/ansible-playbook-tail.png)
## Features
* [x] Simulate reads on existing data
* [x] Simulate writes
* [x] Simulate updates
* [x] Simulate deletes
* [x] Record the following metrics
* [x] The type of operation being simulated
* [x] Total simulation time
* [x] Read times
* [x] Write times
* [x] Confirmation of write times (i.e. how long after a write is the item available when performing a read)
* [x] Update times
* [x] Confirmation of update times (i.e. how long after an update is the item available when performing a read)
* [x] Delete times
* [x] Confirmation of delete times (i.e. how long after a delete is the item no longer available when performing a read)
* [x] Randomized selection of which operation to perform
* [x] Multithreaded performance for publishing to a locally running Elasticsearch cluster
* [x] Highly performant concurrent operations against DynamoDB - 1,000 concurrent operations
* [x] Read-only scenarios for tables that are likely to be hit with mostly reads and very few mutating operations
* [x] Randomly generate schemas for DynamoDB with a specified number of attributes and generate random data to query
**Disclaimer:** This project exists as a proof-of-concept for how to benchmark and evaluate the performance of DynamoDB + DAX. As such,
this project does not contain any unit tests, integration tests, or E2E tests, thus regressions with future updates are possible and
project stability is _not_ guaranteed in perpetuity.
## Warning!
When making changes to this repository, take extra care to be sure you don't commit the automatically generated variables in
the [hosts file](./ansible/inventories/local/hosts.yml) and in the [host_vars](./ansible/inventories/local/host_vars/localhost.yml).
These files have variables that are populated automatically to make your life easier instead of having to specify variables
all the time. You can remove them manually, or you can wipe away everything and have them removed for you:
* `bastion.hosts.BASTION_HOST_PUBLIC_IP:`
* `vpc_id`
* `dax_endpoint`
## Getting Started
The easiest way to use this project is to use the [benchmarker.sh](./benchmarker.sh) script's TUI. This will automate everything for you and make
the use of this project as painless as possible! Just ensure it's executable and run the TUI:
```shell
chmod +x benchmarker.sh
./benchmarker.sh
```
This project is broken into several distinct pieces. For more information on each the specific pieces, refer to their respective README's:
* [Ansible](./ansible/README.md) -- Go here if you're looking to have more control over what's going on across the entire deployment process, local and AWS
* [CDK](./cdk/README.md) -- Go here if you're looking to discover and tweak the AWS stack and resources
The vast majority of this project is designed to be automated; however, it is also designed to allow the user to customize it as needed. Naturally, customization
is a more challenging course of action here as you'll need to do some of the automated steps manually. I try to detail those steps below.
### Prerequisites
* The commands are being run in a Debian-based Linux environment (i.e. Ubuntu, WSL, etc.)
* [AWS CLI v2](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) is installed and configured
* Docker is installed (`sudo apt-get update && sudo apt-get install docker-ce docker-ce-cli`)
* The docker compose plugin is installed (`sudo apt-get update && sudo apt-get install docker-compose-plugin`)
* Rust is installed via rustup (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
* `jq` is installed (`sudo apt-get install jq`)
* Go is installed ([instructions here](https://go.dev/doc/install))
### Setting up your own local Elastic Stack
Fortunately, setting up the Elastic Stack locally is super easy thanks to [this repository](https://github.com/deviantony/docker-elk).
This setup is automated for you via the `Makefile` target: `init`
It will
* Clone the repository to the folder above the current directory
* Go into the folder
* Start the `setup` docker compose to initialize the Elastic Stack. Don't worry. This function is supposed to exit once it's done initializing your local Elastic Stack
### Starting and Stopping the Elastic Stack
To start the Elastic Stack, you can either manually `cd` into the [docker-elk](../docker-elk) folder and run `docker compose up -d`, or you can use the `start-elastic-stack` target in the [`Makefile`](./Makefile).
Similarly, you can stop the Elastic Stack by either `cd`into the [docker-elk](../docker-elk) folder and running `docker compose down`, or you can use the `stop-elastic-stack` target in the [`Makefile`](./Makefile).
### Running the Benchmarkers
To run the benchmarker, make sure you've done all the following steps so that the AWS SDK can pick up your credentials:
* You're logged into your desired AWS account via the AWS CLI
* Ensure you're properly connected with `aws sts get-caller-identity`
* You've exported all the following AWS environment variables, so they can be picked up by the AWS SDK at runtime to authenticate with AWS:
* `AWS_ACCESS_KEY_ID`
* `AWS_SECRET_ACCESS_KEY`
* `AWS_SESSION_TOKEN`
* `AWS_REGION` (not typically defined by the CLI)
* `AWS_ACCOUNT` (this is a special variable that is not normally defined by the AWS CLI: definition is achieved by running `export AWS_ACCOUNT=$(aws sts get-caller-identity | jq -r .Account)`)
* A nifty little shortcut for exporting all but the `AWS_REGION` and `AWS_ACCOUNT` variables is provided with the following command: ```shell eval $(aws configure export-credentials --format env)```
There's a few ways to run the benchmarkers, but the easiest is to build them and run their binaries.
It's as simple as `make build` and running whichever binary that corresponds to the benchmarking you wish to perform; e.g.
* `./dynamodb-benchmarker`
or
* `./dax-benchmarker`
For both the `dynamodb-benchmarker` and the `dax-benchmarker`, additional help and usage flags can be found using `--help`. This way you can tweak your benchmarking experience as necessary.
**dynamodb-benchmarker help**:
```
atusa@atusa-thinkpad:~/code/dynamodb-benchmarker$ ./dynamodb-benchmarker --help
A CLI tool for simulating heavy usage against DynamoDB and publishing metrics to an Elastic Stack for analysis
Usage: dynamodb-benchmarker [OPTIONS]
Options:
-c, --concurrent-simulations <CONCURRENT_SIMULATIONS>
The number of concurrent simulations to run [default: 1000]
-a, --attributes <ATTRIBUTES>
The number of attributes to use when populating and querying the DynamoDB table; minimum value of 1 [default: 5]
-d, --duration <DURATION>
The length of time (in seconds) to run the benchmark for [default: 1800]
-b, --buffer <BUFFER>
The buffer size of the Elasticsearch thread's MPSC channel [default: 500]
-u, --username <USERNAME>
Local Elasticsearch cluster username [default: elastic]
-p, --password <PASSWORD>
Local Elasticsearch cluster password [default: changeme]
-i, --index <INDEX>
The Elasticsearch Index to insert data into [default: dynamodb]
-t, --table-name <TABLE_NAME>
The DynamoDB table to perform operations against [default: atusa-high-velocity-table]
-r, --read-only
Whether to run a read-only scenario for benchmarking
-h, --help
Print help
-V, --version
Print version
```
**dax-benchmarker help**:
```
atusa@atusa-thinkpad:~/code/dynamodb-benchmarker$ ./dynamodb-benchmarker --help
A CLI tool for simulating heavy usage against DAX and publishing metrics to an Elastic Stack for analysis
Usage:
dax-benchmarker [flags]
Flags:
-a, --attributes int The number of attributes to use when populating and querying the DynamoDB table; minimum value of 1 (default 5)
-b, --buffer int The buffer size of the Elasticsearch goroutine's channel (default 500)
-c, --concurrent-simulations int The number of concurrent simulations to run (default 1000)
-d, --duration int The length of time (in seconds) to run the bechmark for (default 1800)
-e, --endpoint string The DAX endpoint to hit when running simulations (assumes secure endpoint, so do not specify port)
-h, --help help for dax-benchmarker
-i, --index string The Elasticsearch Index to insert data into (default "dax")
-p, --password string Local Elasticsearch cluster password (default "changeme")
-r, --read-only Whether to run a read-only scenario for benchmarking
-t, --table string The DynamoDB table to perform operations against (default "atusa-high-velocity-table")
-u, --username string Local Elasticsearch cluster username (default "elastic")
```
#### DAX Benchmarker Gotcha
The nature of DAX os that it does not allow external access from outside the VPC it is deployed in. This is due to the
fact that DAX uses a proprietary protocol that does not support TLS. So, in order to run the DAX benchmarker, we must
push it out and run it from the bastion host that is created in the CDK. This is why the host is created.
To do this manually, you need to know a few pieces of information:
* The SSH key used to connect to the host -- if you did not specify a key manually, then the key is `~/.ssh/$USER-dax-pair.pem`
* The bastion host's public IP address
* The DAX endpoint URI
The bastion host's public IP and the DAX Endpoint can be obtained using the following commands
(You'll need to copy/paste the DAX endpoint once you've SSH'd into the bastion host):
```shell
dax_endpoint=$(aws cloudformation describe-stacks --stack-name "$USER-dax-benchmark-stack" --query "Stacks[0].Outputs[?OutputKey=='DaxEndpoint'].OutputValue" --output text)
bastion_host_ip=$(aws cloudformation describe-stacks --stack-name "$USER-dax-benchmark-stack" --query "Stacks[0].Outputs[?OutputKey=='InstancePublicIp'].OutputValue" --output text)
```
Then, you need to upload the `dax-benchmarker` binary to the bastion host:
```shell
scp -i ~/.ssh/"$USER"-dax-pair.pem dax-benchmarker ec2-user@"$bastion_host_ip":/home/ec2-user/
```
Additionally, you'll need to configure the bastion host to use your current AWS CLI creds; so you'll need to run the following
command locally and paste the output exactly into the SSH session:
```shell
aws configure export-credentials --format env
```
Finally, you need to SSH into the bastion host with remote port forwarding to your local Elasticsearch cluster (port 9200):
```shell
ssh -i ~/.ssh/"$USER"-dax-pair.pem -R 9200:localhost:9200 ec2-user@"$bastion_host_ip"
```
Once you're SSH'd into the bastion host, you'll need to set the `DAX_ENDPOINT` environment variable using the DAX endpoint spit out
by the previous command:
```shell
export DAX_ENDPOINT='PASTE_DAX_ENDPOINT_HERE'
```
Be sure to paste the output of the `aws configure export-credentials --format env` command as well
Finally, be sure to also export the `AWS_REGION` environment variable that matches the region you deployed your stack into.
Once you've done all of this, you're ready to run the `dax-benchmarker` from the bastion host and customize the experience however you need using the
configuration parameters provided (`./dax-benchmarker -h`).
### Scenarios
By default, for both benchmarkers, they perform CRUD simulations that randomly choose to
* Read an existing item
* Write a new item and record how long it takes to confirm it's there (deletes the item afterward)
* Create a new item and update it, then record how long it takes to confirm the update is reflected in subsequent API calls (deletes the item afterward)
However, sometimes a more realistic test is to simply run in `read-only` mode; This is supported by both benchmarkers via the `-r, --read-only` flag.
`read-only` mode, for each concurrent simulation, randomly select a time between 0 and 15 seconds, and then execute a read on an existing item. This simulates more realistic behavior from applications
who are only reading from DAX or DynamoDB and not performing any write, update, or delete operations.
## Accessing the Elastic Stack and analyzing data
By default, the Elastic Stack services are at the following URLs when running locally:
* Elasticsearch -> `http://localhost:9200`
* Kibana -> `http://localhost:5601`
The default credentials for accessing them are
* Username -> `elastic`
* Password -> `changeme`
Once you're in, you can use Kibana to analyze the data published by the benchmarker.
This data lives in the `dynamodb` and the `dax` indices of Elasticsearch by default, unless specified otherwise on the clients by the user.
**Note:** Sometimes the simulations would reach the provisioned throughput thresholds and would be rate-limited by AWS. I set the DynamoDB table to On-Demand
to scale out automatically, however this does not always prevent being rate limited. So that is why I also track the Failed Simulations in the Kibana graphs.
## Populating the DynamoDB benchmarking table with random data
By default, the clients and CDK create a DynamoDB table titled `$USER-high-velocity-table`. To run the clients with a different table name, use the `-t, --table` arguments.
If you wish to populate the table with some data, the easiest way to achieve this is via the [randomly-generate-high-velocity-data](./scripts/randomly-generate-high-velocity-data.sh) script.
Simply run it and specify the number of items you wish to populate (rounded to a multiple of 25) via `-i 50`!
To follow the progress of the script, tail the `/tmp/benchmarker.log` file.
You can specify different arguments to the script to tweak the settings for the script as necessary:
```
atusa@atusa-thinkpad:~/code/dynamodb-benchmarker$ ./scripts/randomly-generate-high-velocity-data.sh --help
randomly-generate-high-velocity-data: A script to randomly generate high-velocity data for some DynamoDB table with random attributes and values for benchmarking purposes.
USAGE:
randomly-generate-high-velocity-data [OPTIONS] [ARGS]...
-h, --help Show this usage screen
ARGS:
-a, --attributes <ATTRIBUTES> The number of attributes to populate each item in the table with
This defaults to 5
-i, --items <ITEMS> The number of items to populate the table with
Items are populated 25 at a time, so whatever number you provide will be rounded to the nearest multiple of 25
-t, --table <TABLE_NAME> The name of the DynamoDB table to populate
This defaults to atusa-high-velocity-table
```
These arguments are provided as a convenience to the user if they so wish to populate a table other than the default one created by the CDK.
## Troubleshooting
In the event you need more information about any of the automation, you can check the various log files created throughout the application:
* `/tmp/ansible-playbook-output.log` -- Generated whenever ansible-playbooks are run from the TUI
* `/tmp/benchmarker.log` -- Generated whenever you run the `randomly-generate-high-velocity-data.sh` script outside the TUI
* `/tmp/benchmarker-tui.log` -- Generated by events in the TUI
* `/tmp/dynamodb-population.log` -- Generated whenever you run the `randomly-generate-high-velocity-data.sh` script from the TUI
+14
View File
@@ -0,0 +1,14 @@
[defaults]
forks = 50
gathering = explicit
host_key_checking = False
nocows = 1
retry_files_enabled = False
roles_path = ./ansible/roles
timeout = 60
callback_whitelist = profile_tasks
[callback_profile_tasks]
sort_order = none
output_limit = 1000
+322
View File
@@ -0,0 +1,322 @@
# Benchmarking Ansible Automation
This folder houses all the [Ansible](https://www.ansible.com/) roles to automate the configuration of your local
environment and to deploy the necessary DynamoDB and DAX components to AWS. AWS Deployments leverage
[AWS CDK](https://aws.amazon.com/cdk/) to automate the provisioning of AWS resources. For more information,
navigate to the [CDK directory](../cdk/README.md).
To just see how to run different plays and their corresponding commands without knowing how it all works together,
skip down to the [Plays](#plays) section below.
Note that if no `ssh_key_name` is provided, the default value is `$USER-dax-pair`
## Prerequisites
* You must be logged into the AWS CLI prior to running the CDK. Ensure you're logged into your target AWS account by running
`aws sts get-caller-identity`.
* Install pip (Assuming python3 is already installed): `sudo apt-get install python3-pip`
* Install the most recent version of Ansible and jmespath from pip: `pip3 install --user ansible jmespath`
* Export the local bin path: `export PATH=~/.local/bin:$PATH`
* Install curl (`sudo apt-get install curl`)
* Install the required Ansible dependencies using Ansible Galaxy (`ansible-galaxy install -r requirements.yml`)
## Initializing the Stack
To initialize the stack (including the local Elastic Stack), run the `deploy_benchmarker.yml` playbook with the `init` tag:
```shell
ansible-playbook -i inventories/local \
--tags init \
--ask-become-pass \
deploy_benchmarker.yml
```
## Deploying the Stack
To deploy the entire benchmarking stack all at once, local and AWS, use the following command:
```shell
ansible-playbook -i inventories/local \
-e vpc_id={{ vpc_id_to_deploy_into }} \
deploy_benchmarker.yml
```
The same prerequisites apply to the CDK with the necessary environment or CDK parameters as is defined in the
[CDK Parameters](../cdk/README.md#cdk-arguments) section of the CDK README. Ansible will only resolve the following variables
for you; all other variables must be supplied by the user a runtime:
* `localIp`
* `awsAccount`
## Running the benchmarkers
To run the benchmarkers, run the following command:
```shell
ansible-playbook -i inventories/local \
-e dax_endpoint={{ the_dax_endpoint_uri }} \
run_benchmarkers.yml
```
### Ansible Command Breakdown
Let's analyze how an ansible command is formed:
```shell
ansible-playbook -i inventories/local \
-e vpc_id={{ vpc_id_to_deploy_into }} \
--ask-become-pass \
deploy_benchmarker.yml
```
`ansible-playbook` is the program that runs our playbook, `deploy_benchmarker.yml`. [Playbooks](https://docs.ansible.com/ansible/latest/user_guide/playbooks_intro.html)
are the main "blueprints" of automation tasks that Ansible uses.
`-i inventories/local` tells Ansible that we want to use the hosts and variables associated
with the `local` environment. So later in the playbook and
[roles](https://docs.ansible.com/ansible/latest/user_guide/playbooks_reuse_roles.html), when we're
using variables and hosts, we're pulling the corresponding values for this environment. More
information about inventories in Ansible can be found
[here](https://docs.ansible.com/ansible/2.3/intro_inventory.html). Inventories would be a good place
to start learning about Ansible if you're confused by what's happening in this module.
[This](./inventories/local/host_vars/localhost.yml) is where you'd put variables to persist between runs of this application.
By default, they are only provided for you if you follow the steps in the main repository script.
`-e vpc_id={{ vpc_id_to_deploy_into }}` is setting an extra variable for the playbook to use (fun fact: `-e` is an alias
for `--extra-vars`). This variable is not defined by default in your [local host vars](./inventories/local/host_vars/localhost.yml) because
we don't know what VPC you want to deploy the stack into. If you're running this using the main TUI script in the root
of this repo, then this is handled graphically for you. This will be set on the first run of the CDK deployment, so you do not have to specify
the `vpc_id` between subsequent runs. Otherwise, if you wish to change the VPC ID for any reason (including prior to an initial run), and
you wish to run this Ansible playbook manually, you can add it to your host vars file.
`--ask-become-pass` is telling Ansible to prompt you for your sudo password, so it can run installs and other configuration tasks on your behalf.
`deploy_benchmarker.yml` is the name of our playbook that we want Ansible to run.
## Using Tags to Control What is Deployed
Each part of the `deploy_benchmarker.yml` playbook has
[tags](https://docs.ansible.com/ansible/latest/user_guide/playbooks_tags.html) associated with them.
These tags allow us to tell Ansible which part(s) of the playbook we want to run. In other words, tags
allow us to tell Ansible which parts of the overall Logstash deployment pipeline we want to run.
They `deploy_benchmarker.yml` playbook (and a couple of roles) has the following tags in it:
* `init`
* `init_elk`
* `stop_elk`
* `prerequisites`
* `elk`
* `cdk`
* `run`
* `deploy`
* `destroy`
* `destroy_key_pair`
* `upload`
* `dynamodb`
* `dax`
* `crud`
* `read-only`
To view all these tags and their associated plays from the `ansible` CLI, run
```shell
ansible-playbook deploy_benchmarker.yml --list-tags
```
Using these tags, we can specify that we only want to run specific parts of the Benchmarking Deployment pipeline that's
defined in the `deploy_benchmarker.yml` playbook.
For example: If we only wanted to start the ELK (Elasticsearch-Logstash-Kibana) stack, we would run this:
```shell
ansible-playbook -i inventories/local --tags elk deploy_benchmarker.yml
```
Likewise, if we wanted to stop the ELK stack, we'd run this:
```shell
ansible-playbook -i inventories/local --tags stop_elk deploy_benchmarker.yml
```
Note the `--tags` argument. This allows us to tell Ansible to only run tasks or roles that have the
`elk` or `stop_elk` tag on them.
We can also specify multiple arguments for `--tags` if we wish; for example, if we wanted to simply spin up the local
Elastic stack (synonymous with ELK stack), and deploy the CDK, we'd run the following:
```shell
ansible-playbook -i inventories/local -e vpc_id=vpc-1234567890 --tags 'elk,cdk' deploy_benchmarker.yml
```
## Plays
The following plays can be run from these playbooks using the tags with the following commands:
#### Initialize Your Local Environment and Elastic Stack
A sudo password is required to install applications, so we tell Ansible to prompt us for it at the start:
```shell
ansible-playbook -i inventories/local --tags init deploy_benchmarker.yml --ask-become-pass
```
#### Deploy CDK and Run the Benchmarkers on the Bastion Host
This assumes you already know the VPC ID to deploy into and have already created an SSH key pair and have the key pair
locally in your `~/.ssh` directory with a `.pem` extension.
If you did not do this manually, it was done for you automatically and the created pair is under `~/.ssh/$USER-dax-pair.pem`.
You can either specify the `vpc_id` argument directly via `-e` in the command, or you can hard code
it in your [host_vars](./inventories/local/host_vars/localhost.yml). You must also already be logged into the AWS CLI for
your target environment, or specify a `profile_id` either in your `host_vars` or via `-e`, along with an `aws_region`. If you're not
already logged into AWS, your `profile_id` must be configured to be picked up automatically from your `~/.aws/config` or
`~/.aws/credentials` files with no additional login steps in order to deploy to AWS.
```shell
ansible-playbook -i inventories/local -e vpc_id=vpc-1234567890 --tags deploy deploy_benchmarker.yml
```
#### Shut Down Your Local Elastic Stack
```shell
ansible-playbook -i inventories/local --tags stop_elk deploy_benchmarker.yml
```
#### Wipe Away everything
Once more, this assumes you either have the DAX
endpoint and the VPC ID hardcoded in your [host vars](./inventories/local/host_vars/localhost.yml), or you provide them via `-e`.
If you've already run a CDK deploy via Ansible, then you should not need to specify anything.
**Note:** For safety purposes, this will _not_ wipe away the `ssk_key_name` in your `~/.ssh` directory. If you specified
a pre-existing key to use for this deployment, it will not be touched. If you did not specify a key name, the automatically
generated key `$USER-dax-pair` will be left in your `~/.ssh` directory. If you wish to delete this pair from your local machine
and remove it from AWS, also specify the `destroy_key_pair` tag as well in the below command.
You can either specify the `vpc_id` argument directly via `-e` in the command, or you can hard code
it in your [host_vars](./inventories/local/host_vars/localhost.yml). You must also already be logged into the AWS CLI for
your target environment, or specify a `profile_id` either in your `host_vars` or via `-e`, along with an `aws_region`. If you're not
already logged into AWS, your `profile_id` must be configured to be picked up automatically from your `~/.aws/config` or
`~/.aws/credentials` files with no additional login steps in order to deploy to AWS.
**Destroy Everything, But Leave the ssh_key_name Key-Pair Alone:**
```shell
ansible-playbook -i inventories/local -e vpc_id=vpc-1234567890 --tags destroy deploy_benchmarker.yml
```
**Destroy Everything, Including the ssh_key_name Key-Pair**
```shell
ansible-playbook -i inventories/local -e vpc_id=vpc-1234567890 --tags 'destroy,destroy_key_pair' deploy_benchmarker.yml
```
### Additional Plays You Can Run
#### Only Install Prerequisites for Local Machine
A sudo password is required to install applications, so we tell Ansible to prompt us for it at the start:
```shell
ansible-playbook -i inventories/local --tags prerequisites deploy_benchmarker.yml --ask-become-pass
```
#### Start Your Local Elastic Stack
```shell
ansible-playbook -i inventories/local --tags elk deploy_benchmarker.yml
```
#### Just Deploy the CDK
This assumes you already know the VPC ID to deploy into and have already created an SSH key pair and have the key pair
locally in your `~/.ssh` directory with a `.pem` extension. If you did not do this manually, it was done for you automatically
and the created pair is under `~/.ssh/$USER-dax-pair.pem`. You can either specify the `vpc_id`
argument directly via `-e` in the command, or you can hard code it in your [host_vars](./inventories/local/host_vars/localhost.yml).
If you've already run a CDK deploy via Ansible, then you should not need to specify anything.
You must also already be logged into the AWS CLI for your target environment, or specify a `profile_id` either in your
`host_vars` or via `-e`, along with an `aws_region`. If you're not already logged into AWS, your `profile_id` must be
configured to be picked up automatically from your `~/.aws/config` or `~/.aws/credentials` files with no additional
login steps in order to deploy to AWS.
```shell
ansible-playbook -i inventories/local --tags cdk deploy_benchmarker.yml
```
#### Only Upload the Benchmarkers to the Bastion Host
```shell
ansible-playbook -i inventories/local --tags upload deploy_benchmarker.yml
```
#### Run All Benchmarkers and Scenarios
This assumes the CDK is already deployed and an EC2 instance already exists. This also assumes you either have the DAX
endpoint and the VPC ID hardcoded in your [host vars](./inventories/local/host_vars/localhost.yml), or you provide them via `-e`.
If you've already run a CDK deploy via Ansible, then you should not need to specify anything.
Additionally, You must also already be logged into the AWS CLI for
your target environment, or specify a `profile_id` either in your `host_vars` or via `-e`, along with an `aws_region`. If you're not
already logged into AWS, your `profile_id` must be configured to be picked up automatically from your `~/.aws/config` or
`~/.aws/credentials` files with no additional login steps in order to deploy to AWS:
```shell
ansible-playbook -i inventories/local --tags run run_benchmarkers.yml
```
#### Only Run the DynamoDB/DAX Benchmarker
This assumes the CDK is already deployed and an EC2 instance already exists. This also assumes you either have the DAX
endpoint and the VPC ID hardcoded in your [host vars](./inventories/local/host_vars/localhost.yml), or you provide them via `-e`.
If you've already run a CDK deploy via Ansible, then you should not need to specify anything.
Additionally, You must also already be logged into the AWS CLI for
your target environment, or specify a `profile_id` either in your `host_vars` or via `-e`, along with an `aws_region`. If you're not
already logged into AWS, your `profile_id` must be configured to be picked up automatically from your `~/.aws/config` or
`~/.aws/credentials` files with no additional login steps in order to deploy to AWS:
```shell
ansible-playbook -i inventories/local --tags dynamodb deploy_benchmarker.yml
```
or
```shell
ansible-playbook -i inventories/local --tags dax deploy_benchmarker.yml
```
Note the difference in tags: `dynamodb` and `dax`
#### Only Run the Benchmarkers in CRUD/READONLY mode
This assumes the CDK is already deployed and an EC2 instance already exists. This also assumes you either have the DAX
endpoint and the VPC ID hardcoded in your [host vars](./inventories/local/host_vars/localhost.yml), or you provide them via `-e`.
If you've already run a CDK deploy via Ansible, then you should not need to specify anything.
Additionally, You must also already be logged into the AWS CLI for
your target environment, or specify a `profile_id` either in your `host_vars` or via `-e`, along with an `aws_region`. If you're not
already logged into AWS, your `profile_id` must be configured to be picked up automatically from your `~/.aws/config` or
`~/.aws/credentials` files with no additional login steps in order to deploy to AWS:
**CRUD:**
```shell
ansible-playbook -i inventories/local --tags crud deploy_benchmarker.yml
```
**read-only:**
```shell
ansible-playbook -i inventories/local --tags read-only deploy_benchmarker.yml
```
## Supported Variables
The following variables are supported to be specified via the `-e` argument when running the `deploy_benchmarker.yml`
playbook:
| Variable Name | Description | Required? |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|-----------|
| `profile_id` | The name of the AWS CLI profile you wish to deploy with; <br>Defaults to using the `AWS_PROFILE` environment variable | |
| `vpc_id` | The ID of the VPC in the AWS account you're deploying to where you want the CDK components created <br>Only required on first run only | * |
| `local_ip` | The public IP of your local machine; <br>Defaults to the response from `curl -s -L checkip.amazonaws.com` | |
| `ssh_key_name` | The name of the SSH key-pair that will be used when creating the EC2 instance to allow you SSH access to it; <br>Defaults to `$USER-dax-pair` | |
| `aws_account` | The account ID of the AWS account you're deploying into; <br>Defaults to the result of `aws sts get-caller-identity \| jq -r .Account` | |
| `base_table_name` | The base name to use when creating the DynamoDB table; <br>Defaults to `high-velocity-table` | |
| `cdk_action` | The action to perform when deploying the CDK; <br>Defaults to `deploy` | |
| `duration` | How long to run each simulation for; <br>Defaults to 1800 seconds | |
| `benchmarker` | Which benchmarker to run (i.e. `dynamodb` or `dax`) | |
| `dax_endpoint` | The DAX URI to use to hit the DAX cluster; <br>Only required when running the benchmarkers and without an initial CDK deploy) | * |
## Run Order
When first running from scratch, you'll want to run with the `init` tags first to initialize the Elastic Stack and install the prerequisites, then run again without any tags to actually
deploy everything and run the benchmarkers. If you only want to run the benchmarkers, run the `run_benchmarkers.yml` playbook, or specify the `run` tag.
## Troubleshooting
You can generally get more information about your problem by adding `-vvv` to the end of your
`ansible-playbook` command. The more `v`'s you add, the more verbose the output and the more information
you will get. For example:
```shell
ansible-playbook -i inventories/local -e cdk_action=destroy --tags 'elk,cdk' deploy_benchmarker.yml -vvv
```
+41
View File
@@ -0,0 +1,41 @@
- name: Deploy the benchmarking components
connection: local
hosts: local
gather_facts: yes
roles:
- { role: install_prerequisites, tags: [ never, prerequisites, init ] }
- { role: configure_elastic_stack, tags: elk }
- { role: deploy_cdk, tags: [ cdk, deploy ] }
- { role: destroy, tags: [ never, destroy ], cdk_action: destroy }
tasks:
- name: Populate the DynamoDB table with random data
shell:
chdir: ../scripts
cmd: ./randomly-generate-high-velocity-data.sh -i 5000
tags: deploy
- name: Build the benchmarkers using the Makefile
shell:
chdir: ../
cmd: make build
tags: deploy
- name: Upload the benchmarkers to the bastion host
hosts: bastion
gather_facts: yes
vars:
ssh_key_name: "{{ hostvars['localhost']['ssh_key_name'] }}"
ansible_ssh_private_key_file: "~/.ssh/{{ ssh_key_name }}.pem"
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
remote_user: ec2-user
tags: [ upload, deploy ]
tasks:
- copy:
src: "../{{ item }}"
dest: .
mode: 0777
loop:
- dynamodb-benchmarker
- dax-benchmarker
- import_playbook: run_benchmarkers.yml
@@ -0,0 +1,8 @@
user_name: "{{ lookup('env', 'USER') }}"
ssh_key_name: "{{ lookup('env', 'USER') }}-dax-pair"
profile_id: "{{ lookup('env', 'AWS_PROFILE') }}"
aws_region: "{{ lookup('env', 'AWS_REGION') }}"
stack_name: "{{ user_name }}-dax-benchmark-stack"
vpc_id:
base_table_name:
dax_endpoint:
+3
View File
@@ -0,0 +1,3 @@
local:
hosts:
localhost:
+4
View File
@@ -0,0 +1,4 @@
---
collections:
- name: community.general
- name: amazon.aws
File diff suppressed because one or more lines are too long
@@ -0,0 +1,32 @@
- name: Clone the docker-elk repo
git:
repo: https://github.com/deviantony/docker-elk.git
dest: ../../docker-elk
ignore_errors: yes
- name: Build the docker-elk stack just in case a pre-existing version of Elasticsearch needs its nodes upgraded
shell:
chdir: ../../docker-elk
cmd: docker compose build
- name: Start the docker-elk setup container
shell:
chdir: ../../docker-elk
cmd: docker-compose up setup
- name: Start the docker-elk stack
shell:
chdir: ../../docker-elk
cmd: docker compose up -d
- name: Wait 20 seconds for the ELK stack to start
pause:
seconds: 20
- name: Import the benchmarking dashboards into Kibana
shell:
cmd: >
curl -X POST http://localhost:5601/api/saved_objects/_import?overwrite=true
-H 'kbn-xsrf: true'
-u 'elastic:changeme'
--form file=@roles/configure_elastic_stack/files/benchmarker-dashboards.ndjson
@@ -0,0 +1,8 @@
- { import_tasks: init_elk_stack.yml, tags: [ never, init, init_elk ] }
- { import_tasks: stop_elk_stack.yml, tags: [ never, stop_elk ] }
- name: Start the docker-elk stack
shell:
chdir: ../../docker-elk
cmd: docker compose up -d
tags: deploy
@@ -0,0 +1,4 @@
- name: Stop the docker-elk stack
shell:
chdir: ../../docker-elk
cmd: docker compose down
+119
View File
@@ -0,0 +1,119 @@
- name: Check if a key-pair following the specified format already exists
stat:
path: "{{ ansible_env.HOME }}/.ssh/{{ ssh_key_name }}.pem"
register: key_pair
changed_when: no
when: "'destroy' not in ansible_run_tags"
- block:
- name: Create a new key-pair
ec2_key:
name: "{{ ssh_key_name }}"
register: aws_key_pair
- name: Create the new pem file
file:
path: "{{ ansible_env.HOME }}/.ssh/{{ ssh_key_name }}.pem"
state: touch
mode: '0400'
- name: Add the generated key-pair to the new file
blockinfile:
path: "{{ ansible_env.HOME }}/.ssh/{{ ssh_key_name }}.pem"
block: "{{ aws_key_pair.key.private_key }}"
when:
- "'destroy' not in ansible_run_tags"
- not key_pair.stat.exists
- name: Fetch the current system's public IP
shell:
cmd: curl -s -L checkip.amazonaws.com
register: public_ip_resp
- name: Fetch the current AWS account ID
shell:
cmd: aws sts get-caller-identity | jq -r .Account
register: aws_account_resp
- name: Install CDK dependencies
npm:
ci: yes
path: ../cdk
- name: Bootstrapping the AWS environment
shell:
chdir: ../cdk
cmd: >
npm run build && yes | npm run cdk bootstrap --
--no-color --require-approval never
--profile {{ profile_id | default("personal") }}
-c vpcId={{ vpc_id }}
-c localIp={{ public_ip_resp.stdout }}
-c sshKeyName={{ ssh_key_name }}
-c awsAccount={{ aws_account_resp.stdout }}
-c baseTableName={{ base_table_name | default('') }}
- name: Deploying Benchmarking CDK
shell:
chdir: ../cdk
cmd: >
npm run build && yes | npm run cdk {{ cdk_action | default("deploy") }} --
--no-color --require-approval never
--profile {{ profile_id | default("personal") }}
-c vpcId={{ vpc_id }}
-c localIp={{ public_ip_resp.stdout }}
-c sshKeyName={{ ssh_key_name }}
-c awsAccount={{ aws_account_resp.stdout }}
-c baseTableName={{ base_table_name | default('') }}
register: cdk_response
- name: Benchmarking CDK deployment summary
debug:
msg: "{{ cdk_response.stderr_lines }}"
- block:
- name: Fetch the benchmark stack outputs
cloudformation_info:
stack_name: "{{ stack_name }}"
register: benchmark_stack
- name: Extracting the bastion host IP
set_fact:
bastion_host_ip: "{{ benchmark_stack.cloudformation[stack_name].stack_outputs['InstancePublicIp'] }}"
- name: Extracting DAX endpoint
set_fact:
dax_endpoint: "{{ benchmark_stack.cloudformation[stack_name].stack_outputs['DaxEndpoint'] }}"
- name: Setting the dax_endpoint variable in the host vars if it doesn't exist already
lineinfile:
path: inventories/local/host_vars/localhost.yml
line: "dax_endpoint: {{ dax_endpoint }}"
regexp: '^dax_endpoint:'
- name: Setting the vpc_id variable in the host vars if it doesn't exist already
lineinfile:
path: inventories/local/host_vars/localhost.yml
line: "vpc_id: {{ vpc_id }}"
regexp: '^vpc_id:'
- block:
- name: Setting the bastion host IP if it doesnt exist in the inventory
lineinfile:
path: inventories/local/hosts.yml
line: |
bastion:
hosts:
{{ bastion_host_ip }}:
regexp: 'bastion:\n\s*hosts:\n\s*(?:\d{1,3}\.){3}\d{1,3}:'
insertafter: EOF
- name: Add the bastion host to the bastion group
add_host:
name: "{{ bastion_host_ip }}"
groups: bastion
when:
- "'bastion' not in groups"
- "'bastion' not in group_names"
when: "'destroy' not in ansible_run_tags"
+54
View File
@@ -0,0 +1,54 @@
- name: Wipe away local Elastic Stack
shell:
chdir: ../../docker-elk
cmd: docker compose down -v
ignore_errors: yes
- name: Wipe away the ELK directory
file:
path: ../../docker-elk
state: absent
ignore_errors: yes
- name: Run CDK Destroy
import_role:
name:
deploy_cdk
- name: Delete the key-pair from AWS
ec2_key:
name: "{{ ssh_key_name }}"
state: absent
ignore_errors: yes
tags: [ never, destroy_key_pair ]
- name: Delete the key pair from your local machine
file:
path: "{{ ansible_env.HOME }}/.ssh/{{ ssh_key_name }}.pem"
state: absent
ignore_errors: yes
tags: [ never, destroy_key_pair ]
- name: Remove the bastion host from the bastion host group
replace:
path: inventories/local/hosts.yml
replace: ''
regexp: '^bastion:\n\s*hosts:\n\s*(?:\d{1,3}\.){3}\d{1,3}:'
- name: Reset the dax_endpoint variable in the host vars
lineinfile:
path: inventories/local/host_vars/localhost.yml
line: 'dax_endpoint:'
regexp: '^dax_endpoint:'
- name: Reset the vpc_id variable in the host vars
lineinfile:
path: inventories/local/host_vars/localhost.yml
line: 'vpc_id:'
regexp: '^vpc_id:'
- name: Clean the repository using the Makefile
shell:
chdir: ../
cmd:
make clean
@@ -0,0 +1,22 @@
- name: Add Docker's official GPG key
apt_key:
url: https://download.docker.com/linux/ubuntu/gpg
keyring: /etc/apt/keyrings/docker.gpg
- name: Set up docker APT repository
apt_repository:
repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
- name: Install the required APT dependencies
apt:
update_cache: yes
name:
- docker-ce
- docker-ce-cli
- docker-compose
- containerd.io
- docker-compose-plugin
- jq
- unzip
- curl
- git
@@ -0,0 +1,26 @@
- name: Check if AWS CLI is installed
shell:
cmd: hash aws 2> /dev/null
ignore_errors: yes
changed_when: no
register: awscli_installation_status
- block:
- name: Download the AWS CLI from AWS
unarchive:
src: https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip
dest: "{{ ansible_env.HOME }}/Downloads"
group: "{{ user_name }}"
owner: "{{ user_name }}"
remote_src: yes
- name: Install the AWS CLI
shell:
cmd: "{{ ansible_env.HOME }}/Downloads/aws/install"
- name: Cleanup downloaded AWS installation files
file:
path: "{{ ansible_env.HOME }}/Downloads/aws/"
state: absent
when: awscli_installation_status.rc | int != 0
@@ -0,0 +1,15 @@
- name: Check if Go is installed
shell:
cmd: command -v go 2> /dev/null
ignore_errors: yes
changed_when: no
register: go_installation_status
- name: Install Go 1.20
unarchive:
src: https://go.dev/dl/go1.20.5.linux-amd64.tar.gz
dest: /usr/local
creates: /usr/local/go
remote_src: yes
become: yes
when: go_installation_status.rc | int != 0
@@ -0,0 +1,25 @@
- { import_tasks: aws_cli.yml, become: yes }
- import_tasks: rust.yml
- import_tasks: go.yml
- import_tasks: node.yml
- { import_tasks: apt.yml, become: yes }
- name: Install CDK
npm:
name: "{{ item }}"
global: yes
loop:
- aws-cdk
- typescript
- name: Check if golangci-lint is installed
shell:
cmd: command -v golangci-lint 2> /dev/null
ignore_errors: yes
changed_when: no
register: golangci_lint_installation_status
- name: Install golangci-lint
shell:
cmd: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /usr/local/bin v1.53.3
when: golangci_lint_installation_status.rc | int != 0
@@ -0,0 +1,34 @@
- name: Check if node is installed
shell:
cmd: hash node 2> /dev/null
ignore_errors: yes
changed_when: no
register: node_installation_status
- block:
- name: Install nvm
shell: >
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash
args:
creates: "{{ ansible_env.HOME }}/.nvm/nvm.sh"
- name: Install Node.JS
shell:
cmd: |
export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
nvm install node
- name: Add NVM exports to bashrc
lineinfile:
path: "{{ ansible_env.HOME }}/.bashrc"
line: 'export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"'
regexp: '^export NVM_DIR=.+'
- name: Add NVM script to bashrc
lineinfile:
path: "{{ ansible_env.HOME }}/.bashrc"
line: '[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"'
regexp: '\[ -s |\$NVM_DIR/nvm\.sh \].+'
when: node_installation_status.rc | int != 0
@@ -0,0 +1,11 @@
- name: Check if rustup is installed
shell:
cmd: command -v rustup 2> /dev/null
ignore_errors: yes
changed_when: no
register: rustup_installation_status
- name: Install Rust via Rustup
shell: >
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
when: rustup_installation_status.rc | int != 0
+110
View File
@@ -0,0 +1,110 @@
- name: Get AWS Credentials
connection: local
hosts: local
gather_facts: yes
tags: [ run, deploy ]
tasks:
- name: Ensure the user is logged into their AWS CLI
assert:
that:
- aws_region is defined
- profile_id is defined
- dax_endpoint is defined
- name: Get the environment variables to set on the bastion host for the current AWS profile
shell:
cmd: aws configure export-credentials
register: aws_creds
- name: Register the aws_creds as a fact for the benchmarkers playbook to receive
set_fact:
aws_credentials: "{{ aws_creds.stdout }}"
- name: Run the benchmarkers
hosts: bastion
gather_facts: no
vars:
ssh_key_name: "{{ hostvars['localhost']['ssh_key_name'] }}"
ansible_ssh_private_key_file: "~/.ssh/{{ ssh_key_name }}.pem"
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -R 9200:localhost:9200'
tags: [ run, deploy ]
remote_user: ec2-user
tasks:
- name: Run the DynamoDB benchmarker in CRUD mode
shell:
cmd: >
export AWS_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('AccessKeyId') }}";
export AWS_SECRET_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SecretAccessKey') }}";
export AWS_SESSION_TOKEN="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SessionToken') }}";
export AWS_CREDENTIAL_EXPIRATION="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('Expiration') }}";
export AWS_REGION="{{ hostvars['localhost']['aws_region'] }}";
./dynamodb-benchmarker -d "{{ duration | default(1800) | int }}" -t "{{ hostvars['localhost']['user_name'] }}"-high-velocity-table
executable: /bin/bash
tags:
- dynamodb
- crud
- name: Run the DynamoDB benchmarker in read-only mode
shell:
cmd: >
export AWS_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('AccessKeyId') }}";
export AWS_SECRET_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SecretAccessKey') }}";
export AWS_SESSION_TOKEN="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SessionToken') }}";
export AWS_CREDENTIAL_EXPIRATION="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('Expiration') }}";
export AWS_REGION="{{ hostvars['localhost']['aws_region'] }}";
./dynamodb-benchmarker -d "{{ duration | default(1800) | int }}" -t "{{ hostvars['localhost']['user_name'] }}"-high-velocity-table -r
executable: /bin/bash
tags:
- dynamodb
- read-only
- name: Run the DAX benchmarker in CRUD mode
shell:
cmd: >
export AWS_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('AccessKeyId') }}";
export AWS_SECRET_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SecretAccessKey') }}";
export AWS_SESSION_TOKEN="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SessionToken') }}";
export AWS_CREDENTIAL_EXPIRATION="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('Expiration') }}";
export AWS_REGION="{{ hostvars['localhost']['aws_region'] }}";
export DAX_ENDPOINT="{{ hostvars['localhost']['dax_endpoint'] }}";
unset cmd;
basecmd='./dax-benchmarker -c 100
-d 115
-t "{{ hostvars['localhost']['user_name'] }}"-high-velocity-table
-e "{{ hostvars['localhost']['dax_endpoint'] }}"';
for i in $(seq 1 9); do
cmd+="$basecmd & ";
done;
cmd+="$basecmd";
timeout -s SIGINT "{{ duration | default(1800) | int }}" bash -c "while :; do $cmd; done"
executable: /bin/bash
ignore_errors: yes
tags:
- dax
- crud
- name: Run the DAX benchmarker in read-only mode
shell:
cmd: >
export AWS_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('AccessKeyId') }}";
export AWS_SECRET_ACCESS_KEY="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SecretAccessKey') }}";
export AWS_SESSION_TOKEN="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('SessionToken') }}";
export AWS_CREDENTIAL_EXPIRATION="{{ hostvars['localhost']['aws_credentials'] | community.general.json_query('Expiration') }}";
export AWS_REGION="{{ hostvars['localhost']['aws_region'] }}";
export DAX_ENDPOINT="{{ hostvars['localhost']['dax_endpoint'] }}";
unset cmd;
basecmd='./dax-benchmarker -c 100
-d 115
-r
-t "{{ hostvars['localhost']['user_name'] }}"-high-velocity-table
-e "{{ hostvars['localhost']['dax_endpoint'] }}"';
for i in $(seq 1 9); do
cmd+="$basecmd & ";
done;
cmd+="$basecmd";
timeout -s SIGINT "{{ duration | default(1800) | int }}" bash -c "while :; do $cmd; done"
executable: /bin/bash
ignore_errors: yes
tags:
- dax
- read-only
Executable
+264
View File
@@ -0,0 +1,264 @@
#!/bin/bash
export PATH="$HOME"/.local/bin:$PATH
source scripts/logger.sh /tmp/benchmarker-tui.log
source scripts/ui_utils.sh
ANSIBLE_LOG_FILE=/tmp/ansible-playbook-output.log
rm "$ANSIBLE_LOG_FILE" > /dev/null 2>&1 &
verify-prerequisites() {
log-info "Verifying prerequisites"
declare prerequisites=(whiptail jq dialog)
if ! (aws sts get-caller-identity > /dev/null 2>&1); then
log-error "Must be logged into AWS CLI to use this script. Log into the target AWS account and run this script again" true
exit 1
fi
for application in "${prerequisites[@]}"; do
if ! (command -v "$application" > /dev/null 2>&1); then
log-warn "$application is required to run this script. Installing $application..."
sudo apt install "$application"
fi
done
if ! (command -v ansible > /dev/null 2>&1); then
log-warn "Ansible is required to run this script. Installing Ansible..." true
sudo apt install python3-pip
pip3 install --user ansible jmespath
fi
if ! (ansible-galaxy collection list | grep -i "community.general\|amazon.aws" > /dev/null 2>&1); then
log-warn "Installing Ansible galaxy requirements..." true
cd ansible
ansible-galaxy install -r requirements.yml
cd -
fi
}
initialize-environment() {
check-sudo-pass "Installing dependencies requires sudo permissions."
if [[ "$?" == 0 ]]; then
log-info "Sudo pass: $PASSWORD"
declare title="Initialize Local Environment"
if (prompt-yes-no "$title"); then
cd ansible
ansible-playbook -i inventories/local -e "ansible_become_password=$PASSWORD" --tags init deploy_benchmarker.yml > "$ANSIBLE_LOG_FILE" 2>&1 &
pid=$!
log-info "Running ansible-playbook 'deploy_benchmarker.yml' with the 'init' tag and logging output to file [$ANSIBLE_LOG_FILE]"
show-tail-box "$title" $pid "$ANSIBLE_LOG_FILE"
msg-box "Successfully initialized the local environment!"
log-info "Successfully initialized the local environment"
cd -
fi
fi
main-menu
}
deploy-and-run-benchmarkers() {
declare title="Deploy and Run Benchmarkers"
if (prompt-yes-no "$title"); then
if [[ -z $VPC_ID ]]; then
prompt-for-vpc-id
fi
cd ansible
ansible-playbook -i inventories/local -e vpc_id="$VPC_ID" deploy_benchmarker.yml > "$ANSIBLE_LOG_FILE" 2>&1 &
pid=$!
log-info "Running ansible-playbook 'deploy_benchmarker.yml' with no tags and logging output to file [$ANSIBLE_LOG_FILE]"
show-tail-box "$title" $pid "$ANSIBLE_LOG_FILE"
msg-box "Successfully deployed and ran benchmarkers!"
log-info "Successfully deployed and ran benchmarkers"
cd -
fi
main-menu
}
destroy-all() {
declare title="Destroy Everything (Clean Slate)"
if (prompt-yes-no "$title"); then
cd ansible
ansible-playbook -i inventories/local --tags 'destroy,destroy_key_pair' deploy_benchmarker.yml > "$ANSIBLE_LOG_FILE" 2>&1 &
pid=$!
log-info "Running ansible-playbook 'deploy_benchmarker.yml' with [destroy,destroy_key_pair] tags and logging output to file [$ANSIBLE_LOG_FILE]"
show-tail-box "$title" $pid "$ANSIBLE_LOG_FILE"
msg-box "Successfully destroyed everything!"
log-info "Successfully destroyed everything"
cd -
fi
main-menu
}
randomly-populate-dynamodb() {
declare title="Populate DynamoDB with Random Data"
if (prompt-yes-no "$title"); then
./scripts/randomly-generate-high-velocity-data.sh /tmp/dynamodb-population.log &
pid=$!
log-info "Running randomly-generate-high-velocity-data script and logging to [$ANSIBLE_LOG_FILE]"
show-tail-box "$title" $pid "$ANSIBLE_LOG_FILE"
msg-box "Successfully populated DynamoDB with random data!"
log-info "Successfully populated DynamoDB with random data"
fi
main-menu
}
custom-selections() {
declare title="Customize What to Run (Advanced Mode)"
declare choices
declare tags=""
choices=$(whiptail --separate-output --checklist --fb "$title" "$BOX_HEIGHT" "$BOX_WIDTH" 13 \
"PREREQUISITES" "Install Prerequisites for Local Machine" OFF \
"INITIALIZE_ELK" "Initialize Local Elastic Stack" OFF \
"START_ELK" "Start Local Elastic Stack" OFF \
"DEPLOY_CDK" "Deploy CDK" OFF \
"UPLOAD_BIN" "Upload Benchmarker binaries" OFF \
"RUN_BENCHMARKERS" "Run Benchmarkers" OFF \
"STOP_ELK" "Stop Local Elastic Stack" OFF \
"DESTROY" "Destroy Everything except the SSH key" OFF \
"DESTROY_KEY" "Destroy the SSK Key" OFF \
"RUN_DYNAMODB" "Run the DynamoDB Benchmarkers" OFF \
"RUN_DAX" "Run the DAX Benchmarkers" OFF \
"RUN_CRUD" "Run the CRUD benchmarks for both the DynamoDB and DAX benchmarkers" OFF \
"RUN_READ_ONLY" "Run the READ-ONLY benchmarks for both the DynamoDB and DAX benchmarkers" OFF 3>&2 2>&1 1>&3)
if [[ -n $choices ]]; then
for choice in $choices; do
case "$choice" in
"PREREQUISITES")
tags+="prerequisites"
;;
"INITIALIZE_ELK")
tags+="init_elk"
;;
"START_ELK")
tags+="elk"
;;
"DEPLOY_CDK")
tags+="cdk"
;;
"UPLOAD_BIN")
tags+="upload"
;;
"RUN_BENCHMARKERS")
tags+="run"
if [[ -z $VPC_ID ]]; then
prompt-for-vpc-id
fi
;;
"STOP_ELK")
tags+="stop_elk"
;;
"DESTROY")
tags+="destroy"
;;
"DESTROY_KEY")
tags+="destroy_key_pair"
;;
"RUN_DYNAMODB")
tags+="dynamodb"
;;
"RUN_DAX")
tags+="dax"
;;
"RUN_CRUD")
tags+="crud"
;;
"RUN_READ_ONLY")
tags+="read-only"
;;
esac
done
if (prompt-yes-no "$title"); then
cd ansible
ansible-playbook -i inventories/local --tags "$tags" deploy_benchmarker.yml > "$ANSIBLE_LOG_FILE" 2>&1 &
pid=$!
log-info "Running ansible-playbook 'deploy_benchmarker.yml' with [$tags] tags and logging output to file [$ANSIBLE_LOG_FILE]"
show-tail-box "$title" $pid "$ANSIBLE_LOG_FILE"
msg-box "Successfully ran custom tasks!"
log-info "Successfully ran custom tasks"
cd -
fi
fi
main-menu
}
prompt-for-vpc-id() {
readarray -t vpc_arr < <(aws ec2 describe-vpcs | jq -r '.Vpcs[] | "\(.VpcId) \((.Tags[]? | select(.Key | contains("Name")) | .Value) // "")"' | awk '{print($1, $2 == "" ? "-" : $2);}')
declare prompt=""
for item in "${vpc_arr[@]}"; do
prompt+="$item OFF "
done
VPC_ID=$(whiptail --fb --title "Select VPC" --radiolist "Select which VPC to use to deploy resources into" "$BOX_HEIGHT" "$BOX_WIDTH" "${vpc_arr[@]}" $prompt 3>&2 2>&1 1>&3)
}
main-menu() {
declare choice
choice=$(whiptail --fb --title "DynamoDB + DAX Benchmarker" --menu "Select an action" "$BOX_HEIGHT" "$BOX_WIDTH" 6 \
"I" "(I)nitialize local environment" \
"D" "(D)eploy and Run benchmarkers" \
"W" "(W)ipe away everything (Clean Slate)" \
"R" "(R)andomly populate DynamoDB" \
"C" "(C)ustom (Advanced)" \
"X" "E(x)it" 3>&2 2>&1 1>&3)
case $choice in
"I")
initialize-environment
;;
"D")
deploy-and-run-benchmarkers
;;
"W")
destroy-all
;;
"R")
randomly-populate-dynamodb
;;
"C")
msg-box "This is for advanced users only! Be sure you know what you're doing, as running some things at the same time can cause problems (like destroy and deploy)!"
custom-selections
;;
"X")
clear
exit 0
;;
esac
}
verify-prerequisites
while :; do
main-menu
done
+8
View File
@@ -0,0 +1,8 @@
*.js
!jest.config.js
*.d.ts
node_modules
# CDK asset staging directory
.cdk.staging
cdk.out
+6
View File
@@ -0,0 +1,6 @@
*.ts
!*.d.ts
# CDK asset staging directory
.cdk.staging
cdk.out
+88
View File
@@ -0,0 +1,88 @@
# DynamoDB + DAX Benchmarker CDK
This CDK project deploys a DynamoDB table with a DAX cluster on top of it, and an EC2 instance to act as a bastion host for running benchmarking tests agasint DAX.
By default, the name of the DynamoDB table that is created is `$USER-high-velocity-table`.
By default, the name of the SSH key that is created for you is `$USER-dax-pair`
It should be noted that due to a bug in CDK, if you destroy the stack, you'll have to manually delete the SubnetGroup in DAX once everything else is deleted.
## Prerequisites
You must be logged into the AWS CLI prior to running the CDK. Ensure you're logged into your target AWS account by running
`aws sts get-caller-identity`.
## Getting started
[NodeJS](https://nodejs.org/en) is required for development. Install NodeJS using the following commands, if it is
not already installed:
### Installing NodeJS
#### Windows
NodeJS can be installed on Windows using the [Chocolatey](https://chocolatey.org) package manager. If Chocolatey is not yet
installed on your system, first install it in a privileged PowerShell:
```powershell
Set-ExecutionPolicy Bypass -Scope Process -Force;
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072;
iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
```
Then, in a _non-privileged_ PowerShell session, install node:
```powershell
choco install nodejs
```
#### Linux
NodeJS can be installed on Linux using [NVM](https://github.com/nvm-sh/nvm). First, install NVM:
```shell
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash
```
**Note:** The installation command was _not_ run with `sudo`. This is intentional, because if you install with `sudo`, then
`sudo` permissions will be required to install any and all new dependencies! You should avoid installing Node for the root
user!
Then, in order to use NVM to install NodeJS, you need to either restart your current shell session, or run the following:
```shell
export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
```
Now, install NodeJS:
```shell
nvm install node
```
### Installing dependent libraries
Once node is installed, run the following commands to install the NPM libraries:
```shell
cd cdk
npm install -g aws-cdk
npm install -g typescript --save-dev
npm install
```
## CDK Arguments
This application depends on a few additional parameters in order to run. They can be specified in one of two ways: environment variables, or via the `-c` argument of the `cdk` command.
**Important:** Only one environment variable is required by the application, regardless of which parameter specification method you choose: `AWS_REGION`.
The following is a table of the **required** parameters for running the CDK
| Parameter Name | Environment Variable Name | Description |
|----------------|---------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `vpcId` | `VPC_ID` | The VPC ID you wish to deploy all of the stack's components into |
| `localIp` | `LOCAL_IP` | Your local IP; Used to allow SSH and Elasticsearch access in the EC2 security group |
| `sshKeyName` | `SSH_KEY_NAME` | The key name of your ssh key to allow you access to your EC2 instance. This should only be the name of the `.pem` file, and should not include the `.pem` extension. |
| `awsAccount` | `AWS_ACCOUNT` | The account ID of your AWS account. |
| `awsRegion` | `AWS_REGION` | The AWS region to deploy this stack and its components into |
### Optional Parameters
It is sometimes necessary to tweak the deployment a bit for different use cases. The CDK can be tweaked with the following parameters:
| Parameter Name | Default Value | Description |
|-----------------|-----------------------|-------------------------------------------------------------------------------------------------------------------------|
| `baseTableName` | `high-velocity-table` | This is the base name for the table. All tables created by the stack will be prefixed with `$USER` to prevent conflicts |
+69
View File
@@ -0,0 +1,69 @@
#!/usr/bin/env node
import 'source-map-support/register';
import * as cdk from 'aws-cdk-lib';
import {EnvironmentProps} from '../lib/types';
import { DaxBenchmarkingStack } from '../lib/dax-benchmarking-stack';
const app = new cdk.App();
const user = process.env.USER || '';
let vpcId = app.node.tryGetContext('vpcId');
if (!vpcId) {
if (!process.env.VPC_ID) {
throw new Error('vpcId is a required parameter. Specify it with `-c vpcId=someId`, or by setting the VPC_ID environment variable');
} else {
vpcId = process.env.VPC_ID
}
}
let localIp = app.node.tryGetContext('localIp');
if (!localIp) {
if (!process.env.LOCAL_IP) {
throw new Error('Local IP is a required parameter. Specify it with `-c localIp=XXX.XXX.XXX.XXX`, or by setting the LOCAL_IP environment variable');
} else {
localIp = process.env.LOCAL_IP
}
}
let sshKeyName = app.node.tryGetContext('sshKeyName');
if (!sshKeyName) {
if (!process.env.SSH_KEY_NAME) {
sshKeyName = `${user}-dax-pair`;
} else {
sshKeyName = process.env.SSH_KEY_NAME;
}
}
let awsAccount = app.node.tryGetContext('awsAccount');
if (!awsAccount) {
if (!process.env.AWS_ACCOUNT) {
throw new Error('awsAccount is a required parameter. Specify it with `-c awsAccount=1234567890`, or by setting the AWS_ACCOUNT environment variable.');
} else {
awsAccount = process.env.AWS_ACCOUNT;
}
}
let awsRegion = app.node.tryGetContext('awsRegion');
if (!awsRegion) {
if (!process.env.AWS_REGION) {
throw new Error('The `AWS_REGION` environment variable was not set. It must be set in order to use this application.');
} else {
awsRegion = process.env.AWS_REGION
}
}
let baseTableName = app.node.tryGetContext('baseTableName');
if (!baseTableName) {
baseTableName = 'high-velocity-table'
}
const environmentProps: EnvironmentProps = {
env: { account: awsAccount, region: awsRegion },
baseTableName,
removalPolicy: cdk.RemovalPolicy.DESTROY,
user,
vpcId,
localIp,
sshKeyName
};
new DaxBenchmarkingStack(app, `${user}-dax-benchmark-stack`, environmentProps);
+8
View File
@@ -0,0 +1,8 @@
module.exports = {
testEnvironment: 'node',
roots: ['<rootDir>/test'],
testMatch: ['**/*.test.ts'],
transform: {
'^.+\\.tsx?$': 'ts-jest'
}
};
+50
View File
@@ -0,0 +1,50 @@
import { Tags } from "aws-cdk-lib";
import { Construct } from "constructs";
import { EnvironmentProps } from "./types";
import { Instance, InstanceClass, InstanceSize, InstanceType, MachineImage, Peer, Port, SecurityGroup, SubnetType, Vpc } from "aws-cdk-lib/aws-ec2";
import { IRole, Role, ServicePrincipal } from "aws-cdk-lib/aws-iam";
export class DaxBastionHost extends Construct {
public readonly instanceRole: IRole;
public readonly instance: Instance;
constructor(scope: Construct, id: string, environmentProps: EnvironmentProps, daxSecurityGroup: SecurityGroup) {
super(scope, id);
Tags.of(this).add('Application', 'dynamodb-dax-benchmarker');
const { removalPolicy, user, vpcId, localIp, sshKeyName } = environmentProps;
const localIpCidr = `${localIp}/32`;
const vpc = Vpc.fromLookup(this, 'Vpc', { vpcId });
const bastionHostSecurityGroup = new SecurityGroup(this, `${user}-dax-sg`, {
vpc,
description: `Allow SSH, Elasticsearch, and DAX access for ${user}`,
securityGroupName: `${user}-dax-bastion-host-sg`
});
bastionHostSecurityGroup.applyRemovalPolicy(removalPolicy);
bastionHostSecurityGroup.addIngressRule(Peer.ipv4(localIpCidr), Port.tcp(22), "Allow SSH access to this instance from the users public IP");
bastionHostSecurityGroup.addIngressRule(Peer.ipv4(localIpCidr), Port.tcp(9200), "Allow the host to communicate with the users locally running Elasticsearch cluster");
bastionHostSecurityGroup.addIngressRule(daxSecurityGroup, Port.allTraffic());
daxSecurityGroup.addIngressRule(bastionHostSecurityGroup, Port.allTraffic());
this.instanceRole = new Role(this, `${user}-bastion-role`, {
roleName: `${user}-bastion-role`,
assumedBy: new ServicePrincipal('ec2.amazonaws.com'),
});
this.instanceRole.applyRemovalPolicy(removalPolicy);
this.instance = new Instance(this, `${user}-dax-bastion-host`, {
vpc,
instanceType: InstanceType.of(InstanceClass.T2, InstanceSize.SMALL),
machineImage: MachineImage.latestAmazonLinux2023(),
instanceName: `${user}-dax-bastion-host`,
keyName: sshKeyName,
vpcSubnets: vpc.selectSubnets({ subnetType: SubnetType.PUBLIC }),
securityGroup: bastionHostSecurityGroup,
role: this.instanceRole
});
this.instance.applyRemovalPolicy(removalPolicy);
}
}
+89
View File
@@ -0,0 +1,89 @@
import { Construct } from "constructs";
import { EnvironmentProps } from "./types";
import { CfnOutput, Stack, Tags } from "aws-cdk-lib";
import { CfnCluster, CfnSubnetGroup } from "aws-cdk-lib/aws-dax";
import { Effect, PolicyDocument, PolicyStatement, Role, ServicePrincipal } from "aws-cdk-lib/aws-iam";
import { SecurityGroup, SubnetType, Vpc } from "aws-cdk-lib/aws-ec2";
import { DynamoDbBenchmarkTable } from "./dynamodb";
import { DaxBastionHost } from "./bastion-host";
export class DaxBenchmarkingStack extends Stack {
constructor(scope: Construct, id: string, environmentProps: EnvironmentProps) {
super(scope, id, environmentProps);
Tags.of(this).add('Application', 'dynamodb-dax-benchmarker');
const { user, removalPolicy, vpcId } = environmentProps;
const { table } = new DynamoDbBenchmarkTable(this, `${user}-dynamodb-benchmark-table`, environmentProps);
const vpc = Vpc.fromLookup(this, 'Vpc', { vpcId });
const daxSecurityGroup = new SecurityGroup(this, `${user}-dax-sg`, {
vpc,
securityGroupName: `${user}-dax-sg`
});
daxSecurityGroup.applyRemovalPolicy(removalPolicy);
const { instanceRole, instance } = new DaxBastionHost(this, `${user}-dax-bastion-host`, environmentProps, daxSecurityGroup);
const daxClusterName = `${user}-high-velocity`;
const daxFullAccessPolicy = new PolicyStatement({
effect: Effect.ALLOW,
actions: [
"dynamodb:BatchGetItem",
"dynamodb:GetItem",
"dynamodb:Query",
"dynamodb:Scan",
"dynamodb:BatchWriteItem",
"dynamodb:DeleteItem",
"dynamodb:PutItem",
"dynamodb:UpdateItem",
"dynamodb:DescribeLimits",
"dynamodb:DescribeTimeToLive",
"dynamodb:DescribeTable",
"dynamodb:ListTables"
],
resources: [table.tableArn]
});
const daxServiceRole = new Role(this, `${daxClusterName}-role`, {
assumedBy: new ServicePrincipal("dax.amazonaws.com"),
inlinePolicies: {
DAXFullAccess: new PolicyDocument({
statements: [daxFullAccessPolicy]
})
}
});
daxServiceRole.applyRemovalPolicy(removalPolicy);
instanceRole.addToPrincipalPolicy(daxFullAccessPolicy);
const subnetGroup = new CfnSubnetGroup(this, `${user}-dax-subnet-group`, {
subnetIds: vpc.selectSubnets({
subnetType: SubnetType.PRIVATE_ISOLATED
}).subnetIds,
subnetGroupName: `${user}-dax-subnet-group`,
});
subnetGroup.applyRemovalPolicy(removalPolicy);
const daxCluster = new CfnCluster(this, daxClusterName, {
iamRoleArn: daxServiceRole.roleArn,
nodeType: 'dax.r5.large',
replicationFactor: 3,
securityGroupIds: [daxSecurityGroup.securityGroupId],
subnetGroupName: subnetGroup.subnetGroupName,
availabilityZones: vpc.availabilityZones,
clusterEndpointEncryptionType: 'TLS',
clusterName: daxClusterName,
sseSpecification: {
sseEnabled: true,
}
});
daxCluster.applyRemovalPolicy(removalPolicy);
daxCluster.addDependency(subnetGroup);
new CfnOutput(this, 'DaxEndpoint', { value: daxCluster.attrClusterDiscoveryEndpointUrl });
new CfnOutput(this, 'InstanceId', { value: instance.instanceId });
new CfnOutput(this, 'InstancePublicIp', { value: instance.instancePublicIp });
}
}
+27
View File
@@ -0,0 +1,27 @@
import {Tags} from "aws-cdk-lib";
import {Construct} from "constructs";
import {EnvironmentProps} from "./types";
import {AttributeType, BillingMode, Table} from "aws-cdk-lib/aws-dynamodb";
export class DynamoDbBenchmarkTable extends Construct {
public readonly table: Table;
constructor(scope: Construct, id: string, environmentProps: EnvironmentProps) {
super(scope, id);
Tags.of(this).add('Application', 'dynamodb-dax-benchmarker');
const { baseTableName, removalPolicy, user } = environmentProps;
const tableName = `${user}-${baseTableName}`;
this.table = new Table(this, tableName, {
partitionKey: {
name: 'id',
type: AttributeType.STRING
},
tableName,
removalPolicy,
billingMode: BillingMode.PAY_PER_REQUEST
});
}
}
+10
View File
@@ -0,0 +1,10 @@
import {RemovalPolicy, StackProps} from "aws-cdk-lib";
export interface EnvironmentProps extends StackProps {
readonly baseTableName: string
readonly removalPolicy: RemovalPolicy
readonly user: string
readonly vpcId: string
readonly localIp: string
readonly sshKeyName: string
}
+40
View File
@@ -0,0 +1,40 @@
module github.com/Dark-Alex-17/dynamodb-benchmarker
go 1.20
require (
github.com/aws/aws-cdk-go/awscdk/v2 v2.88.0
github.com/aws/aws-dax-go v1.2.12
github.com/aws/aws-sdk-go v1.44.301
github.com/aws/constructs-go/constructs/v10 v10.2.69
github.com/aws/jsii-runtime-go v1.85.0
github.com/elastic/go-elasticsearch/v8 v8.8.2
github.com/google/uuid v1.3.0
github.com/sirupsen/logrus v1.9.3
gopkg.in/loremipsum.v1 v1.1.2
)
require (
github.com/Masterminds/semver/v3 v3.2.1 // indirect
github.com/cdklabs/awscdk-asset-awscli-go/awscliv1/v2 v2.2.200 // indirect
github.com/cdklabs/awscdk-asset-kubectl-go/kubectlv20/v2 v2.1.2 // indirect
github.com/cdklabs/awscdk-asset-node-proxy-agent-go/nodeproxyagentv5/v2 v2.0.165 // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/yuin/goldmark v1.4.13 // indirect
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
golang.org/x/mod v0.12.0 // indirect
golang.org/x/tools v0.11.0 // indirect
)
require (
github.com/antlr/antlr4 v0.0.0-20181218183524-be58ebffde8e // indirect
github.com/elastic/elastic-transport-go/v8 v8.3.0 // indirect
github.com/gofrs/uuid v3.3.0+incompatible // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/spf13/cobra v1.7.0
golang.org/x/sys v0.10.0 // indirect
)
+245
View File
@@ -0,0 +1,245 @@
package main
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"math/rand"
"os"
"strconv"
"strings"
"time"
"github.com/Dark-Alex-17/dynamodb-benchmarker/pkg/models"
"github.com/Dark-Alex-17/dynamodb-benchmarker/pkg/simulators"
"github.com/Dark-Alex-17/dynamodb-benchmarker/pkg/utils"
"github.com/aws/aws-dax-go/dax"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/dynamodb"
"github.com/elastic/go-elasticsearch/v8"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
var concurrentSimulations, buffer, attributes, duration int
var username, password, index, endpoint, tableName string
var readOnly bool
func main() {
rootCmd := &cobra.Command{
Use: "dax-benchmarker",
Short: "A CLI tool for simulating heavy usage against DAX and publishing metrics to an Elastic Stack for analysis",
RunE: func(cmd *cobra.Command, args []string) error {
if err := validateFlags(); err != nil {
return err
}
execute()
return nil
},
}
rootCmd.PersistentFlags().IntVarP(&concurrentSimulations, "concurrent-simulations", "c", 1000, "The number of concurrent simulations to run")
rootCmd.PersistentFlags().IntVarP(&buffer, "buffer", "b", 500, "The buffer size of the Elasticsearch goroutine's channel")
rootCmd.PersistentFlags().IntVarP(&attributes, "attributes", "a", 5, "The number of attributes to use when populating and querying the DynamoDB table; minimum value of 1")
rootCmd.PersistentFlags().IntVarP(&duration, "duration", "d", 1800, "The length of time (in seconds) to run the benchmark for")
rootCmd.PersistentFlags().StringVarP(&username, "username", "u", "elastic", "Local Elasticsearch cluster username")
rootCmd.PersistentFlags().StringVarP(&password, "password", "p", "changeme", "Local Elasticsearch cluster password")
rootCmd.PersistentFlags().StringVarP(&index, "index", "i", "dax", "The Elasticsearch Index to insert data into")
rootCmd.PersistentFlags().StringVarP(&tableName, "table", "t", fmt.Sprintf("%s-high-velocity-table", os.Getenv("USER")), "The DynamoDB table to perform operations against")
rootCmd.PersistentFlags().StringVarP(&endpoint, "endpoint", "e", "", "The DAX endpoint to hit when running simulations (assumes secure endpoint, so do not specify port)")
rootCmd.PersistentFlags().BoolVarP(&readOnly, "read-only", "r", false, "Whether to run a read-only scenario for benchmarking")
if err := rootCmd.Execute(); err != nil {
log.Errorf("Something went wrong parsing CLI args and executing the client: %v", err)
}
}
func validateFlags() error {
if len(endpoint) == 0 {
daxEndpointEnvironmentVariable := os.Getenv("DAX_ENDPOINT")
if len(daxEndpointEnvironmentVariable) == 0 {
return errors.New("a DAX endpoint must be specified either via -e, --endpoint or via the DAX_ENDPOINT environment variable")
} else {
endpoint = daxEndpointEnvironmentVariable
}
}
if attributes < 1 {
return errors.New("the number of attributes cannot be lower than 1")
}
if len(os.Getenv("AWS_REGION")) == 0 {
return errors.New("an AWS region must be specified using the AWS_REGION environment variable")
}
return nil
}
func execute() {
esChan := make(chan models.DynamoDbSimulationMetrics, buffer)
defer close(esChan)
daxEndpoint := fmt.Sprintf("%s:9111", endpoint)
region := os.Getenv("AWS_REGION")
sess := session.Must(session.NewSession(&aws.Config{
Credentials: credentials.NewChainCredentials([]credentials.Provider{&credentials.EnvProvider{}}),
Endpoint: &daxEndpoint,
Region: &region,
}))
if _, err := sess.Config.Credentials.Get(); err != nil {
log.Errorf("credentials were not loaded! %v+", err)
}
client, err := dax.NewWithSession(*sess)
if err != nil {
log.Errorf("unable to initialize dax client %v", err)
}
partitionKeys, err := scanAllPartitionKeys(client)
if err != nil {
log.Errorf("Unable to fetch partition keys! Simulation failed! %v+", err)
}
go startElasticsearchPublisher(esChan)
for i := 0; i < concurrentSimulations; i++ {
go simulationLoop(esChan, client, partitionKeys)
}
duration, err := time.ParseDuration(strconv.Itoa(duration) + "s")
if err != nil {
log.Errorf("Unable to create duration from the provided time: %v", err)
return
}
<-time.After(duration)
}
func startElasticsearchPublisher(c <-chan models.DynamoDbSimulationMetrics) {
config := elasticsearch.Config{
Addresses: []string{
"http://localhost:9200",
},
Username: username,
Password: password,
}
esClient, err := elasticsearch.NewClient(config)
if err != nil {
log.Errorf("unable to initialize elasticsearch client %v", err)
}
mapping := `{
"properties": {
"timestamp": {
"type": "date"
}
}
}`
log.Infof("Setting the explicit mappings for the %s index", index)
if _, err := esClient.Indices.Create(index); err != nil {
log.Warnf("Unable to create the %s index. Encountered the following error: %v", index, err)
}
if _, err := esClient.Indices.PutMapping([]string{index}, strings.NewReader(mapping)); err != nil {
log.Errorf("unable to create mapping for the %s index! %v+", index, err)
}
for metric := range c {
log.Info("Publishing metrics to Elasticsearch...")
data, _ := json.Marshal(metric)
_, err := esClient.Index(index, bytes.NewReader(data))
if err != nil {
log.Error("Was unable to publish metrics to Elasticsearch! Received a non 2XX response")
} else {
log.Info("Successfully published metrics to Elasticsearch")
}
}
}
func simulationLoop(c chan<- models.DynamoDbSimulationMetrics, client *dax.Dax, partitionKeys []string) {
for {
metrics := new(models.DynamoDbSimulationMetrics)
metrics.Successful = true
metrics.Timestamp = time.Now().UnixNano() / 1e6
startTime := time.Now()
if readOnly {
log.Info("Running a read-only simulation...")
metrics.Scenario = models.ScenarioReadOnly.String()
runReadOnlySimulation(client, metrics, partitionKeys)
} else {
log.Info("Running a CRUD simulation...")
metrics.Scenario = models.ScenarioCrud.String()
runCrudSimulation(client, metrics, partitionKeys)
}
log.Info("Simulation completed successfully!")
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.SimulationTime = &millisecondDuration
log.Infof("Metrics: %v+", metrics)
c <- *metrics
}
}
func runReadOnlySimulation(client *dax.Dax, metrics *models.DynamoDbSimulationMetrics, partitionKeys []string) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
time.Sleep(time.Duration(r.Intn(16)))
metrics.Operation = models.DynamoRead.String()
simulators.SimulateReadOperation(client, tableName, partitionKeys, metrics)
}
func runCrudSimulation(client *dax.Dax, metrics *models.DynamoDbSimulationMetrics, partitionKeys []string) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
operation := r.Intn(3)
log.Infof("Operation number: %d", operation)
switch operation {
case int(models.DynamoRead):
metrics.Operation = models.DynamoRead.String()
simulators.SimulateReadOperation(client, tableName, partitionKeys, metrics)
case int(models.DynamoWrite):
metrics.Operation = models.DynamoWrite.String()
simulators.SimulateWriteOperation(client, tableName, attributes, metrics)
case int(models.DynamoUpdate):
metrics.Operation = models.DynamoUpdate.String()
simulators.SimulateUpdateOperation(client, tableName, attributes, metrics)
}
}
func scanAllPartitionKeys(client *dax.Dax) ([]string, error) {
log.Info("Fetching a large list of partition keys to randomly read...")
projectionExpression := "id"
var limit int64 = 10000
response, err := client.Scan(&dynamodb.ScanInput{
TableName: &tableName,
Limit: &limit,
ProjectionExpression: &projectionExpression,
})
if err != nil {
log.Errorf("Unable to fetch partition keys! %v", err)
return []string{}, err
} else {
log.Info("Fetched partition keys!")
keys := make([]string, 100)
for _, itemsMap := range response.Items {
keys = append(keys, *utils.MapValues(itemsMap)[0].S)
}
log.Infof("Found a total of %d keys", len(keys))
return keys, nil
}
}
+89
View File
@@ -0,0 +1,89 @@
package models
import (
"fmt"
"math/rand"
"strconv"
"time"
"github.com/aws/aws-sdk-go/service/dynamodb"
"github.com/google/uuid"
"gopkg.in/loremipsum.v1"
)
type DynamoOperation int
const (
DynamoRead DynamoOperation = iota
DynamoWrite
DynamoUpdate
)
func (d DynamoOperation) String() string {
switch d {
case DynamoRead:
return "read"
case DynamoWrite:
return "write"
case DynamoUpdate:
return "update"
default:
return "read"
}
}
type Scenario int
const (
ScenarioCrud Scenario = iota
ScenarioReadOnly
)
func (s Scenario) String() string {
switch s {
case ScenarioCrud:
return "crud"
case ScenarioReadOnly:
return "readOnly"
default:
return "crud"
}
}
type BenchmarkingItem map[string]*dynamodb.AttributeValue
func NewBenchmarkingItem(attributes int) BenchmarkingItem {
benchmarkingItem := make(map[string]*dynamodb.AttributeValue)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
loremIpsumGenerator := loremipsum.NewWithSeed(time.Now().UnixNano())
id := uuid.New().String()
benchmarkingItem["id"] = &dynamodb.AttributeValue{S: &id}
for i := 0; i < attributes; i++ {
switch i % 2 {
case 1:
float := fmt.Sprintf("%.2f", r.Float64()*32.00)
benchmarkingItem[strconv.Itoa(i)] = &dynamodb.AttributeValue{N: &float}
default:
sentence := loremIpsumGenerator.Sentence()
benchmarkingItem[strconv.Itoa(i)] = &dynamodb.AttributeValue{S: &sentence}
}
}
return benchmarkingItem
}
type DynamoDbSimulationMetrics struct {
Operation string `json:"operation"`
Timestamp int64 `json:"timestamp"`
Successful bool `json:"successful"`
Scenario string `json:"scenario"`
SimulationTime *float64 `json:"simulationTime,omitempty"`
ReadTime *float64 `json:"readTime,omitempty"`
WriteTime *float64 `json:"writeTime,omitempty"`
WriteItemConfirmationTime *float64 `json:"writeItemConfirmationTime,omitempty"`
UpdateTime *float64 `json:"updateItem,omitempty"`
UpdateItemConfirmationTime *float64 `json:"updateItemConfirmationTime,omitempty"`
DeleteTime *float64 `json:"deleteTime,omitempty"`
DeleteItemConfirmationTime *float64 `json:"deleteItemConfirmationTime,omitempty"`
}
+110
View File
@@ -0,0 +1,110 @@
package simulators
import (
"time"
"github.com/Dark-Alex-17/dynamodb-benchmarker/pkg/models"
"github.com/aws/aws-dax-go/dax"
"github.com/aws/aws-sdk-go/service/dynamodb"
log "github.com/sirupsen/logrus"
)
func ReadItem(client *dax.Dax, tableName string, id dynamodb.AttributeValue, metrics *models.DynamoDbSimulationMetrics, recordMetrics bool) (dynamodb.GetItemOutput, error) {
partitionKey := *id.S
startTime := time.Now()
response, err := client.GetItem(&dynamodb.GetItemInput{
TableName: &tableName,
Key: map[string]*dynamodb.AttributeValue{
"id": {S: id.S},
},
})
if recordMetrics {
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.ReadTime = &millisecondDuration
}
if err != nil {
log.Errorf("Could not fetch item with partition key: %v. %v+", partitionKey, err)
metrics.Successful = false
return dynamodb.GetItemOutput{}, err
}
if len(response.Item) == 0 {
log.Infof("No items found with partition key: %v", partitionKey)
return dynamodb.GetItemOutput{}, nil
}
return *response, nil
}
func UpdateItem(client *dax.Dax, tableName string, id dynamodb.AttributeValue, attributes int, metrics *models.DynamoDbSimulationMetrics) {
updatedItem := models.NewBenchmarkingItem(attributes)
updatedItem["id"] = &id
partitionKey := *id.S
startTime := time.Now()
_, err := client.PutItem(&dynamodb.PutItemInput{
TableName: &tableName,
Item: updatedItem,
})
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.UpdateTime = &millisecondDuration
if err != nil {
log.Errorf("Could not update item with partition key: %v. %v+", partitionKey, err)
metrics.Successful = false
} else {
log.Infof("Successfully updated item with partition key: %v", partitionKey)
}
}
func PutItem(client *dax.Dax, tableName string, attributes int, metrics *models.DynamoDbSimulationMetrics) (models.BenchmarkingItem, error) {
newItem := models.NewBenchmarkingItem(attributes)
partitionKey := *newItem["id"].S
startTime := time.Now()
_, err := client.PutItem(&dynamodb.PutItemInput{
TableName: &tableName,
Item: newItem,
})
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.WriteTime = &millisecondDuration
if err != nil {
log.Errorf("Could not put new item with partition key: %v. %v+", partitionKey, err)
metrics.Successful = false
return models.BenchmarkingItem{}, err
}
log.Infof("Successfully put new item with partition key: %v", partitionKey)
return newItem, nil
}
func DeleteItem(client *dax.Dax, tableName string, id dynamodb.AttributeValue, metrics *models.DynamoDbSimulationMetrics) {
partitionKey := *id.S
startTime := time.Now()
_, err := client.DeleteItem(&dynamodb.DeleteItemInput{
TableName: &tableName,
Key: map[string]*dynamodb.AttributeValue{
"charger_id": &id,
},
})
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.DeleteTime = &millisecondDuration
if err != nil {
log.Errorf("Could not delete item with partition key: %v. %v+", partitionKey, err)
metrics.Successful = false
} else {
log.Infof("Successfully deleted item with partition key: %v", partitionKey)
}
}
+111
View File
@@ -0,0 +1,111 @@
package simulators
import (
"math/rand"
"strings"
"time"
"github.com/Dark-Alex-17/dynamodb-benchmarker/pkg/models"
"github.com/aws/aws-dax-go/dax"
"github.com/aws/aws-sdk-go/service/dynamodb"
log "github.com/sirupsen/logrus"
)
func SimulateReadOperation(client *dax.Dax, tableName string, partitionKeys []string, metrics *models.DynamoDbSimulationMetrics) {
log.Info("Performing READ operation...")
r := rand.New(rand.NewSource(time.Now().UnixNano()))
var partitionKey string
for {
partitionKey = partitionKeys[r.Intn(len(partitionKeys))]
if len(strings.TrimSpace(partitionKey)) == 0 {
log.Info("Parition key was empty. Trying again to choose a non-empty partition key")
} else {
break
}
}
id := dynamodb.AttributeValue{S: &partitionKey}
for i := 0; i < 10; i++ {
log.Infof("Attempt %d: Fetching existing item with partition key: %v", i, partitionKey)
response, _ := ReadItem(client, tableName, id, metrics, true)
if response.Item["id"] != nil {
log.Infof("Successfully read existing item with partition key: %v", partitionKey)
break
}
log.Errorf("Unable to find existing item with partition key: %v", partitionKey)
if i == 9 {
log.Errorf("All attempts to fetch the existing item with partition key: %v failed!", partitionKey)
metrics.Successful = false
}
}
}
func SimulateWriteOperation(client *dax.Dax, tableName string, attributes int, metrics *models.DynamoDbSimulationMetrics) {
log.Info("Performing WRITE operation...")
benchmarkingItem, err := PutItem(client, tableName, attributes, metrics)
if err != nil {
log.Errorf("Unable to complete PUT simulation. %v+", err)
metrics.Successful = false
return
}
id := *benchmarkingItem["id"]
AssertItemWasCreated(client, tableName, id, metrics)
DeleteItem(client, tableName, id, metrics)
AssertItemWasDeleted(client, tableName, id, metrics)
}
func SimulateUpdateOperation(client *dax.Dax, tableName string, attributes int, metrics *models.DynamoDbSimulationMetrics) {
log.Info("Performing UPDATE operation...")
newItem, err := PutItem(client, tableName, attributes, metrics)
if err != nil {
log.Errorf("Unable to complete UPDATE simulation. %v+", err)
metrics.Successful = false
return
}
id := *newItem["id"]
partitionKey := *id.S
attemptsExhausted := false
AssertItemWasCreated(client, tableName, id, metrics)
UpdateItem(client, tableName, id, attributes, metrics)
startTime := time.Now()
for i := 0; i < 10; i++ {
log.Infof("Attempt %d: Fetching updated item for partition key: %v...", i, partitionKey)
updatedItem, err := ReadItem(client, tableName, id, metrics, false)
if err != nil {
log.Errorf("Unable to complete UPDATE simulation. %v+", err)
metrics.Successful = false
return
}
if *newItem["1"].N != *updatedItem.Item["1"].N {
log.Infof("Confirmed update for partition key: %v", partitionKey)
break
} else {
log.Errorf("Update for partition key %v failed! Values are still equal!", partitionKey)
if i == 9 {
log.Error("Exhausted attempts to fetch updated item!")
metrics.Successful = false
attemptsExhausted = true
}
}
}
if !attemptsExhausted {
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.UpdateItemConfirmationTime = &millisecondDuration
}
DeleteItem(client, tableName, id, metrics)
AssertItemWasDeleted(client, tableName, id, metrics)
}
+69
View File
@@ -0,0 +1,69 @@
package simulators
import (
"time"
"github.com/Dark-Alex-17/dynamodb-benchmarker/pkg/models"
"github.com/aws/aws-dax-go/dax"
"github.com/aws/aws-sdk-go/service/dynamodb"
log "github.com/sirupsen/logrus"
)
func AssertItemWasCreated(client *dax.Dax, tableName string, id dynamodb.AttributeValue, metrics *models.DynamoDbSimulationMetrics) {
partitionKey := *id.S
attemptsExhausted := false
startTime := time.Now()
for i := 0; i < 10; i++ {
log.Infof("Attempt %d: Fetching newly added item with partition key: %v", i, partitionKey)
newItem, err := ReadItem(client, tableName, id, metrics, false)
if err != nil || newItem.Item["id"].S == nil {
log.Errorf("Unable to find new item with partition key: %v", partitionKey)
if i == 9 {
log.Errorf("All attempts to fetch the newly added item with partition key: %v failed!", partitionKey)
attemptsExhausted = true
metrics.Successful = false
}
} else {
log.Infof("Successfully read new item with partition key: %v", partitionKey)
break
}
}
if !attemptsExhausted {
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.WriteItemConfirmationTime = &millisecondDuration
}
}
func AssertItemWasDeleted(client *dax.Dax, tableName string, id dynamodb.AttributeValue, metrics *models.DynamoDbSimulationMetrics) {
partitionKey := *id.S
attemptsExhausted := false
startTime := time.Now()
for i := 0; i < 10; i++ {
log.Infof("Attempt %d: Fetching deleted item with partition key: %v ...", i, partitionKey)
deletedItem, _ := ReadItem(client, tableName, id, metrics, false)
if deletedItem.Item["id"].S == nil {
log.Infof("Item with partition key: %v was successfully deleted.", partitionKey)
break
} else {
log.Errorf("Item with partition key %v was not deleted as expected!", partitionKey)
if i == 9 {
log.Errorf("All attempts to receive an empty response to verify item with partition key: %v was deleted failed!", partitionKey)
attemptsExhausted = true
metrics.Successful = false
}
}
}
if !attemptsExhausted {
duration := time.Since(startTime).Microseconds()
millisecondDuration := float64(duration) / 1000
metrics.DeleteItemConfirmationTime = &millisecondDuration
}
}
+11
View File
@@ -0,0 +1,11 @@
package utils
func MapValues[K comparable, V any](inputMap map[K]V) []V {
valuesSlice := make([]V, 0)
for _, value := range inputMap {
valuesSlice = append(valuesSlice, value)
}
return valuesSlice
}
+10
View File
@@ -0,0 +1,10 @@
tab_spaces=2
edition = "2021"
reorder_imports = true
imports_granularity = "Crate"
group_imports = "StdExternalCrate"
reorder_modules = true
merge_derives = true
use_field_init_shorthand = true
format_macro_matchers = true
format_macro_bodies = true
Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

+41
View File
@@ -0,0 +1,41 @@
#!/bin/bash
BENCHMARK_LOG_FILE="$1"
if [[ -z $BENCHMARK_LOG_FILE ]]; then
BENCHMARK_LOG_FILE=/tmp/benchmarker.log
fi
red=$(tput setaf 1)
green=$(tput setaf 2)
gold=$(tput setaf 3)
blue=$(tput setaf 4)
magenta=$(tput setaf 5)
cyan=$(tput setaf 6)
default=$(tput sgr0)
bold=$(tput bold)
log-error() {
if [[ -z $2 ]]; then
echo -e "${red}${bold}ERROR:${default}${red} $1${default}"
else
echo -e "${red}${bold}ERROR:${default}${red} $1${default}"
echo -e "${red}${bold}ERROR:${default}${red} $1${default}" >> "$BENCHMARK_LOG_FILE"
fi
}
log-warn() {
if [[ -z $2 ]]; then
echo -e "${gold}${bold}WARN:${default}${gold} $1${default}"
else
echo -e "${gold}${bold}WARN:${default}${gold} $1${default}"
echo -e "${gold}${bold}WARN:${default}${gold} $1${default}" >> "$BENCHMARK_LOG_FILE"
fi
}
log-info() {
if [[ -z $2 ]]; then
echo -e "${cyan}${bold}INFO:${default}${cyan} $1${default}"
else
echo -e "${cyan}${bold}INFO:${default}${cyan} $1${default}"
echo -e "${cyan}${bold}INFO:${default}${cyan} $1${default}" >> "$BENCHMARK_LOG_FILE"
fi
}
+180
View File
@@ -0,0 +1,180 @@
#!/bin/bash
if [[ $(basename "$(pwd)") == scripts ]]; then
source logger.sh
else
source scripts/logger.sh
fi
trap 'echo Stopping...; exit' SIGINT
usage() {
cat << EOF
${blue}${bold}randomly-generate-high-velocity-data${default}: A script to randomly generate high-velocity data for some DynamoDB table with random attributes and values for benchmarking purposes.
${gold}${bold}USAGE:${default}
randomly-generate-high-velocity-data [OPTIONS] [ARGS]...
${green}-h, --help${default} Show this usage screen
${gold}${bold}ARGS:${default}
${green}-a, --attributes ${magenta}<ATTRIBUTES>${default} The number of attributes to populate each item in the table with
This defaults to 5
${green}-i, --items ${magenta}<ITEMS>${default} The number of items to populate the table with
${bold}Note:${default} Items are populated 25 at a time, so whatever number you provide will be rounded to the nearest multiple of 25
${green}-t, --table ${magenta}<TABLE_NAME>${default} The name of the DynamoDB table to populate
This defaults to $USER-high-velocity-table
EOF
}
ensure-required-variables-are-set() {
declare required_variables=(AWS_PROFILE AWS_REGION ITEMS)
for variable in "${required_variables[@]}"; do
if [[ -z "${!variable}" ]]; then
log-error "A required variable environment is not initialized: $variable"
exit 1
fi
done
}
parse-arguments() {
declare parsed_args
parsed_args=$(getopt -a -n randomly-generate-high-velocity-data -o :a:hi:t: --long attributes:,help,items:,table: -- "$@")
declare valid_arguments=$?
if [[ $valid_arguments != 0 ]]; then
log-error "Invalid arguments passed. See usage below."
usage
exit 1
fi
eval set -- "$parsed_args"
while :; do
case "$1" in
"-a" | "--attributes")
ATTRIBUTES="$2"
shift 2
;;
"-h" | "--help")
usage
exit
;;
"-i" | "--items")
ITEMS="$2"
shift 2
;;
"-t" | "--table")
TABLE_NAME="$2"
shift 2
;;
--)
shift
break
;;
*)
log-error "An invalid option was passed, but somehow getopt didn't catch it: $1. Displaying usage and exiting..."
usage
exit 1
;;
esac
done
if [[ -z $TABLE_NAME ]]; then
TABLE_NAME="$USER-high-velocity-table"
fi
if [[ -z $ATTRIBUTES ]]; then
ATTRIBUTES=5
fi
ensure-required-variables-are-set
if [[ $ATTRIBUTES -lt 1 ]]; then
log-error "ATTRIBUTES must be a value of at least 1 so that attributes can be added to the table."
exit 1
fi
if ! (aws sts get-caller-identity > /dev/null 2>&1); then
log-error "You must be logged into the AWS CLI in order to use this script. Please log into the AWS CLI first and then try again."
exit 1
fi
}
show-properties() {
log-info "Using the following settings to randomly populate the DynamoDB benchmarking table:"
cat <<-EOF
${cyan}
ATTRIBUTES=$ATTRIBUTES
TABLE_NAME=$TABLE_NAME
${default}
EOF
}
generate-attribute-value() {
declare current_val=$1
case "$((current_val % 2))" in
"1")
echo '"'"$current_val"'": {"N": "'"$(seq 0 .01 32 | shuf | head -1)"'"}'
;;
*)
echo '"'"$current_val"'": {"S": "'"$(base64 /dev/urandom | awk '{print(0==NR%100)?"":$1}' | sed 's/[^[:alpha:]]/ /g' | head -1)"'"}'
;;
esac
}
generate-put-request() {
declare attribute_values
attribute_values=$(generate-attribute-value 0)
for j in $(seq 1 $((ATTRIBUTES-1))); do
attribute_values="$attribute_values, $(generate-attribute-value "$j")"
done
cat <<-EOF
{
"PutRequest": {
"Item": {
"id": {"S": "$(cat /proc/sys/kernel/random/uuid)"},
$attribute_values
}
}
}
EOF
}
generate-batch-json() {
declare batch_request='{ "'"$TABLE_NAME"'": ['
batch_request="$batch_request $(generate-put-request)"
for i in $(seq 0 23); do
batch_request="$batch_request, $(generate-put-request)"
done
batch_request="$batch_request ]}"
echo "$batch_request"
}
if ! (command -v aws > /dev/null 2>&1); then
log-error "The AWS CLI must be installed first. Install the CLI first and then try again."
exit 1
fi
parse-arguments "$@"
show-properties
declare -i i=0
declare -i items_written=0
while [[ $items_written -lt $ITEMS ]]; do
log-info "Writing 25 entries to DynamoDB..."
aws dynamodb batch-write-item --request-items "$(generate-batch-json)"
log-info 'Entries Written!'
((i++))
((items_written+=25))
log-info "Total entries written: $items_written"
log-info "Sleeping for 2 seconds to avoid the partition throughput limits..."
sleep 2
done
+36
View File
@@ -0,0 +1,36 @@
#!/bin/bash
TERMINAL_HEIGHT=$(tput lines)
BOX_HEIGHT=$(printf "%.0f" "$(echo "scale=2; $TERMINAL_HEIGHT * .5" | bc)")
TERMINAL_WIDTH=$(tput cols)
BOX_WIDTH=$(printf "%.0f" "$(echo "scale=2; $TERMINAL_WIDTH * .75" | bc)")
msg-box() {
whiptail --fb --msgbox "$1" "$BOX_HEIGHT" "$BOX_WIDTH"
}
check-sudo-pass() {
log-info "Prompting user for sudo password with message: $1"
if [[ ! "$PASSWORD" ]]; then
PASSWORD=$(whiptail --fb --passwordbox "$1 Enter your sudo password" "$BOX_HEIGHT" "$BOX_WIDTH" 3>&2 2>&1 1>&3)
fi
}
show-tail-box() {
trap "kill $2 2> /dev/null" EXIT
while kill -0 "$2" 2> /dev/null; do
dialog --title "$1" --exit-label "Finished" --tailbox "$3" "$BOX_HEIGHT" "$BOX_WIDTH"
done
clear
trap - EXIT
}
prompt-yes-no() {
declare action="$1"
log-info "Prompting user if they wish to proceed with $action"
whiptail --fb --title "$action?" --yesno "Are you sure you wish to proceed with the specified action: $action?" --defaultno "$BOX_HEIGHT" "$BOX_WIDTH"
}
+315
View File
@@ -0,0 +1,315 @@
use std::{env, time::Duration};
use anyhow::anyhow;
use aws_sdk_dynamodb::Client;
use chrono::Utc;
use clap::Parser;
use elasticsearch::{
auth::Credentials,
http::{
transport::{SingleNodeConnectionPool, TransportBuilder},
Url,
},
indices::IndicesPutMappingParts,
Elasticsearch,
};
use log::{error, info, warn, LevelFilter};
use log4rs::{
append::console::ConsoleAppender,
config::{Appender, Root},
encode::pattern::PatternEncoder,
};
use models::{DynamoDbSimulationMetrics, DynamoOperation};
use rand::{
rngs::{OsRng, StdRng},
Rng, SeedableRng,
};
use serde_json::json;
use tokio::{
select,
sync::mpsc::{self, Receiver, Sender},
task::JoinHandle,
};
use tokio_util::sync::CancellationToken;
use crate::{models::Scenario, simulators::Simulator};
mod models;
mod simulators;
mod timer_utils;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// The number of concurrent simulations to run
#[arg(short, long, default_value_t = 1000)]
concurrent_simulations: u32,
/// The number of attributes to use when populating and querying the DynamoDB table; minimum value of 1
#[arg(short, long, default_value_t = 5)]
attributes: u32,
/// The length of time (in seconds) to run the benchmark for
#[arg(short, long, default_value_t = 1800)]
duration: u64,
/// The buffer size of the Elasticsearch thread's MPSC channel
#[arg(short, long, default_value_t = 500)]
buffer: usize,
/// Local Elasticsearch cluster username
#[arg(short, long, default_value_t = String::from("elastic"))]
username: String,
/// Local Elasticsearch cluster password
#[arg(short, long, default_value_t = String::from("changeme"))]
password: String,
/// The Elasticsearch Index to insert data into
#[arg(short, long, default_value_t = String::from("dynamodb"))]
index: String,
/// The DynamoDB table to perform operations against
#[arg(short, long, default_value_t = format!("{}-high-velocity-table", env::var("USER").unwrap()))]
table_name: String,
/// Whether to run a read-only scenario for benchmarking
#[arg(short, long)]
read_only: bool,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
log4rs::init_config(init_logging_config())?;
let cancellation_token = CancellationToken::new();
let (es_tx, es_rx) = mpsc::channel::<DynamoDbSimulationMetrics>(cli.buffer);
std::thread::spawn(move || {
start_elasticsearch_publisher(es_rx, cli.username, cli.password, cli.index)
});
let handles: Vec<JoinHandle<_>> = (0..cli.concurrent_simulations)
.map(|_| {
let tx = es_tx.clone();
let token = cancellation_token.clone();
let table_name = cli.table_name.clone();
tokio::spawn(async move {
let config = aws_config::load_from_env().await;
let dynamodb_client = Client::new(&config);
match scan_all_partition_keys(&dynamodb_client, table_name.clone()).await {
Ok(partition_keys_vec) => {
let simulator = Simulator::new(
&dynamodb_client,
table_name.clone(),
cli.attributes,
&partition_keys_vec,
);
select! {
_ = token.cancelled() => {
warn!("Task cancelled. Shutting down...");
}
_ = simulation_loop(simulator, cli.read_only, tx) => ()
}
}
Err(e) => error!("Unable to fetch partition keys: {e:?}"),
}
})
})
.collect();
tokio::spawn(async move {
info!(
"Starting timer task. Executing for {} seconds",
cli.duration
);
tokio::time::sleep(Duration::from_secs(cli.duration)).await;
cancellation_token.cancel();
});
for handle in handles {
match handle.await {
Ok(_) => info!("Task shut down gracefully"),
Err(e) => warn!("Task did not shut down gracefully {e:?}"),
}
}
Ok(())
}
#[tokio::main]
async fn start_elasticsearch_publisher(
mut elasticsearch_rx: Receiver<DynamoDbSimulationMetrics>,
username: String,
password: String,
index: String,
) -> anyhow::Result<()> {
let url = Url::parse("http://localhost:9200")?;
let connection_pool = SingleNodeConnectionPool::new(url);
let credentials = Credentials::Basic(username, password);
let transport = TransportBuilder::new(connection_pool)
.auth(credentials)
.build()?;
let es_client = Elasticsearch::new(transport);
info!("Setting the explicit mappings for the {index} index");
es_client
.indices()
.put_mapping(IndicesPutMappingParts::Index(&[&index]))
.body(json!({
"properties": {
"timestamp": {
"type": "date"
}
}
}))
.send()
.await?;
while let Some(metric) = elasticsearch_rx.recv().await {
info!("Publishing metrics to Elasticsearch...");
let es_response = es_client
.index(elasticsearch::IndexParts::Index(&index))
.body(metric)
.send()
.await;
match es_response {
Ok(resp) => {
if resp.status_code().is_success() {
info!("Successfully published metrics to Elasticsearch");
} else {
error!("Was unable to publish metrics to Elasticsearch! Received non 2XX response");
}
}
Err(e) => {
error!("Unable to publish metrics to Elasticsearch! {e:?}");
}
}
}
Ok(())
}
async fn simulation_loop(
mut simulator: Simulator<'_>,
read_only: bool,
tx: Sender<DynamoDbSimulationMetrics>,
) {
let mut rng = StdRng::from_seed(OsRng.gen());
loop {
let mut metrics = DynamoDbSimulationMetrics::default();
metrics.timestamp = Utc::now();
let simulation_time = time!(match {
if read_only {
info!("Running a read-only simulation...");
metrics.scenario = Scenario::ReadOnly;
run_read_only_simulation(&mut simulator, &mut metrics, &mut rng).await
} else {
info!("Running a CRUD simulation...");
metrics.scenario = Scenario::Crud;
run_crud_simulation(&mut simulator, &mut metrics, &mut rng).await
}
} {
Ok(_) => {
info!("Simulation completed successfully!");
metrics.successful = true;
}
Err(e) => error!("Simulation did not complete. Encountered the following error: {e:?}"),
});
metrics.simulation_time = Some(simulation_time);
info!("Metrics: {metrics:?}");
match tx.send(metrics).await {
Ok(_) => info!("Metrics sent down channel successfully"),
Err(e) => error!("Metrics were unable to be sent down the channel! {e:?}"),
}
}
}
async fn run_read_only_simulation(
simulator: &mut Simulator<'_>,
metrics: &mut DynamoDbSimulationMetrics,
rng: &mut StdRng,
) -> anyhow::Result<()> {
tokio::time::sleep(Duration::from_secs(rng.gen_range(0..15))).await;
metrics.operation = DynamoOperation::Read;
simulator.simulate_read_operation(metrics).await?;
Ok(())
}
async fn run_crud_simulation(
simulator: &mut Simulator<'_>,
metrics: &mut DynamoDbSimulationMetrics,
rng: &mut StdRng,
) -> anyhow::Result<()> {
match DynamoOperation::from(rng.gen_range(0..3)) {
DynamoOperation::Read => {
metrics.operation = DynamoOperation::Read;
simulator.simulate_read_operation(metrics).await?
}
DynamoOperation::Write => {
metrics.operation = DynamoOperation::Write;
simulator.simulate_write_operation(metrics).await?;
}
DynamoOperation::Update => {
metrics.operation = DynamoOperation::Update;
simulator.simulate_update_operation(metrics).await?;
}
}
Ok(())
}
async fn scan_all_partition_keys(
dynamodb_client: &Client,
table_name: String,
) -> anyhow::Result<Vec<String>> {
info!("Fetching a large list of partition keys to randomly read...");
let response = dynamodb_client
.scan()
.table_name(table_name)
.limit(10000)
.projection_expression("id")
.send()
.await;
match response {
Ok(resp) => {
info!("Fetched partition keys!");
let partition_keys = resp
.items()
.unwrap()
.into_iter()
.map(|attribute| {
attribute
.values()
.last()
.unwrap()
.as_s()
.unwrap()
.to_string()
})
.collect::<Vec<String>>();
info!("Found a total of {} keys", partition_keys.len());
Ok(partition_keys)
}
Err(e) => {
error!("Unable to fetch partition keys! {e:?}");
Err(anyhow!(e))
}
}
}
fn init_logging_config() -> log4rs::Config {
let stdout = ConsoleAppender::builder()
.encoder(Box::new(PatternEncoder::new(
"{d(%Y-%m-%d %H:%M:%S%.3f)(utc)} <{i}> [{l}] {f}:{L} - {m}{n}",
)))
.build();
log4rs::Config::builder()
.appender(Appender::builder().build("stdout", Box::new(stdout)))
.build(Root::builder().appender("stdout").build(LevelFilter::Info))
.unwrap()
}
+102
View File
@@ -0,0 +1,102 @@
use std::collections::HashMap;
use aws_sdk_dynamodb::types::AttributeValue;
use chrono::{DateTime, Utc};
use rand::Rng;
use serde::Serialize;
use serde_json::Number;
use uuid::Uuid;
#[derive(Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub enum DynamoOperation {
#[default]
Read,
Write,
Update,
}
impl From<i32> for DynamoOperation {
fn from(value: i32) -> Self {
match value {
0 => DynamoOperation::Read,
1 => DynamoOperation::Write,
2 => DynamoOperation::Update,
_ => DynamoOperation::Read,
}
}
}
#[derive(Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub enum Scenario {
#[default]
Crud,
ReadOnly,
}
#[derive(Debug)]
pub struct BenchmarkingItem(HashMap<String, AttributeValue>);
impl From<HashMap<String, AttributeValue>> for BenchmarkingItem {
fn from(value: HashMap<String, AttributeValue>) -> BenchmarkingItem {
BenchmarkingItem(value)
}
}
impl BenchmarkingItem {
pub fn new(attributes: u32) -> BenchmarkingItem {
let mut benchmarking_item = HashMap::<String, AttributeValue>::new();
let mut rng = rand::thread_rng();
benchmarking_item.insert(
"id".to_owned(),
AttributeValue::S(Uuid::new_v4().to_string()),
);
(0..attributes).for_each(|i| {
if let 0 = i % 2 {
benchmarking_item.insert(i.to_string(), AttributeValue::S(lipsum::lipsum_words(15)));
} else {
benchmarking_item.insert(
i.to_string(),
AttributeValue::N(rng.gen_range(0.0..=32.0).to_string()),
);
}
});
BenchmarkingItem(benchmarking_item)
}
pub fn get_id(&self) -> AttributeValue {
self.0.get("id").cloned().unwrap()
}
pub fn insert(&mut self, key: &str, val: AttributeValue) -> Option<AttributeValue> {
self.0.insert(key.to_owned(), val)
}
pub(crate) fn get(&self, key: &str) -> Option<&AttributeValue> {
self.0.get(key)
}
pub fn extract_map(&self) -> HashMap<String, AttributeValue> {
self.0.clone()
}
}
#[derive(Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub struct DynamoDbSimulationMetrics {
pub operation: DynamoOperation,
pub timestamp: DateTime<Utc>,
pub successful: bool,
pub scenario: Scenario,
pub simulation_time: Option<Number>,
pub read_time: Option<Number>,
pub write_time: Option<Number>,
pub write_item_confirmation_time: Option<Number>,
pub update_time: Option<Number>,
pub update_item_confirmation_time: Option<Number>,
pub delete_time: Option<Number>,
pub delete_item_confirmation_time: Option<Number>,
}
+72
View File
@@ -0,0 +1,72 @@
use aws_sdk_dynamodb::types::AttributeValue;
use log::{error, info};
use crate::{models::DynamoDbSimulationMetrics, time};
use super::{utils, Simulator};
impl<'a> Simulator<'a> {
pub(super) async fn assert_item_was_created(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let partition_key = utils::extract_partition_key(id.clone());
let mut attempts_exhausted = false;
let write_confirmation_time = time!(for i in 0..10 {
info!("Attempt {i}: Fetching newly added item with partition key: {partition_key}");
match self.read_item(id.clone(), metrics, false).await? {
Some(_) => {
info!("Successfully read new item with partition key: {partition_key}");
break;
}
None => {
error!("Unable to find new item with partition key: {partition_key}");
if i == 9 {
error!("All attempts to fetch the newly added item with partition key: {partition_key} failed!");
attempts_exhausted = true;
}
}
};
});
if !attempts_exhausted {
metrics.write_item_confirmation_time = Some(write_confirmation_time);
}
Ok(())
}
pub(super) async fn assert_item_was_deleted(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let partition_key = utils::extract_partition_key(id.clone());
let mut attempts_exhausted = false;
let delete_confirmation_time = time!(for i in 0..10 {
info!("Attempt {i}: Fetching deleted item with partition key: {partition_key}...");
match self.read_item(id.clone(), metrics, false).await? {
Some(_) => {
error!("Item with partition key {partition_key} was not deleted as expected!");
if i == 9 {
error!("All attempts to receive an empty response to verify item with partition key: {partition_key} was deleted failed!");
attempts_exhausted = true;
}
}
None => {
info!("Item with partition key {partition_key} was successfully deleted.");
break;
}
}
});
if !attempts_exhausted {
metrics.delete_item_confirmation_time = Some(delete_confirmation_time);
}
Ok(())
}
}
+140
View File
@@ -0,0 +1,140 @@
use aws_sdk_dynamodb::{types::AttributeValue, Client};
use log::{error, info};
use rand::{
rngs::{OsRng, StdRng},
Rng, SeedableRng,
};
use crate::{models::DynamoDbSimulationMetrics, time};
mod assertions;
mod operations;
mod utils;
pub struct Simulator<'a> {
dynamodb_client: &'a Client,
table_name: String,
attributes: u32,
partition_keys_vec: &'a [String],
rng: StdRng,
}
impl<'a> Simulator<'a> {
pub fn new(
dynamodb_client: &'a Client,
table_name: String,
attributes: u32,
partition_keys_vec: &'a [String],
) -> Simulator<'a> {
Simulator {
dynamodb_client,
table_name,
attributes,
partition_keys_vec,
rng: StdRng::from_seed(OsRng.gen()),
}
}
pub async fn simulate_read_operation(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
info!("Performing READ Operation...");
let partition_key =
self.partition_keys_vec[self.rng.gen_range(0..self.partition_keys_vec.len())].clone();
let id = AttributeValue::S(partition_key.clone());
for i in 0..10 {
info!("Attempt {i}: Fetching existing item with partition key: {partition_key}");
match self.read_item(id.clone(), metrics, true).await? {
Some(_) => {
info!("Successfully read existing item with partition key: {partition_key}");
break;
}
None => {
error!("Unable to find existing item with partition key: {partition_key}");
if i == 9 {
error!(
"All attempts to fetch the existing item with partition key: {partition_key} failed!"
);
}
}
}
}
Ok(())
}
pub async fn simulate_write_operation(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
info!("Performing WRITE operation...");
let benchmarking_item = self.put_item(metrics).await?;
let id = benchmarking_item.get_id();
self.assert_item_was_created(id.clone(), metrics).await?;
self.delete_item(id.clone(), metrics).await?;
self.assert_item_was_deleted(id, metrics).await?;
Ok(())
}
pub async fn simulate_update_operation(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
info!("Performing UPDATE operation...");
let new_item = self.put_item(metrics).await?;
let id = new_item.get_id();
let partition_key = utils::extract_partition_key(id.clone());
let mut attempts_exhausted = false;
self.assert_item_was_created(id.clone(), metrics).await?;
self.update_item(id.clone(), metrics).await?;
let update_confirmation_time = time!(for i in 0..10 {
info!("Attempt {i}: Fetching updated item for partition key: {partition_key}...");
let updated_item = self.read_item(id.clone(), metrics, false).await?.unwrap();
let new_item_attribute_value = new_item
.get("1")
.cloned()
.unwrap()
.as_n()
.unwrap()
.to_string();
let updated_item_attribute_value = updated_item
.get("1")
.cloned()
.unwrap()
.as_n()
.unwrap()
.to_string();
if new_item_attribute_value != updated_item_attribute_value {
info!("Confirmed update for partition key: {partition_key}");
break;
} else {
error!("Update for partition key {partition_key} failed! Values are still equal!");
if i == 9 {
error!("Exhausted attempts to fetch updated item!");
attempts_exhausted = true;
}
}
});
if !attempts_exhausted {
metrics.update_item_confirmation_time = Some(update_confirmation_time);
}
self.delete_item(id.clone(), metrics).await?;
self.assert_item_was_deleted(id, metrics).await?;
Ok(())
}
}
+144
View File
@@ -0,0 +1,144 @@
use anyhow::anyhow;
use aws_sdk_dynamodb::types::AttributeValue;
use log::{error, info};
use crate::{
models::{BenchmarkingItem, DynamoDbSimulationMetrics},
time,
};
use super::{utils::extract_partition_key, Simulator};
impl<'a> Simulator<'a> {
pub async fn read_item(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
record_metrics: bool,
) -> anyhow::Result<Option<BenchmarkingItem>> {
let partition_key = extract_partition_key(id.clone());
let (read_time, response) = time!(
resp,
self
.dynamodb_client
.get_item()
.table_name(self.table_name.clone())
.key("id", id)
.send()
.await
);
if record_metrics {
metrics.read_time = Some(read_time);
}
match response {
Ok(resp) => {
info!("Found item: {}", partition_key);
if let Some(item) = resp.item() {
info!("Fetched item: {item:?}");
Ok(Some(BenchmarkingItem::from(item.clone())))
} else {
info!("No items found with partition key: {partition_key}");
Ok(None)
}
}
Err(e) => {
error!("Could not fetch item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
pub async fn update_item(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let mut updated_item = BenchmarkingItem::new(self.attributes);
updated_item.insert("id", id.clone());
let partition_key = extract_partition_key(id);
let (update_time, response) = time!(
resp,
self
.dynamodb_client
.put_item()
.table_name(self.table_name.clone())
.set_item(Some(updated_item.extract_map()))
.send()
.await
);
metrics.update_time = Some(update_time);
match response {
Ok(_) => {
info!("Successfully updated item with partition_key: {partition_key}");
Ok(())
}
Err(e) => {
error!("Could not update item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
pub async fn put_item(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<BenchmarkingItem> {
let new_item = BenchmarkingItem::new(self.attributes);
let partition_key = extract_partition_key(new_item.get("id").cloned().unwrap());
let (time, response) = time!(
resp,
self
.dynamodb_client
.put_item()
.table_name(self.table_name.clone())
.set_item(Some(new_item.extract_map()))
.send()
.await
);
metrics.write_time = Some(time);
match response {
Ok(_) => {
info!("Successfully put new item with partition key: {partition_key}");
Ok(new_item)
}
Err(e) => {
error!("Could not put new item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
pub async fn delete_item(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let partition_key = extract_partition_key(id.clone());
let (delete_time, response) = time!(
resp,
self
.dynamodb_client
.delete_item()
.table_name(self.table_name.clone())
.key("id", id)
.send()
.await
);
metrics.delete_time = Some(delete_time);
match response {
Ok(_) => {
info!("Successfully deleted item with partition key: {partition_key}");
Ok(())
}
Err(e) => {
error!("Could not delete item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
}
+5
View File
@@ -0,0 +1,5 @@
use aws_sdk_dynamodb::types::AttributeValue;
pub(super) fn extract_partition_key(id: AttributeValue) -> String {
id.clone().as_s().unwrap().to_string()
}
+14
View File
@@ -0,0 +1,14 @@
#[macro_export]
macro_rules! time {
($x:expr) => {{
let start = std::time::Instant::now();
let _result = $x;
serde_json::Number::from(start.elapsed().as_millis())
}};
($resp:ident, $x:expr) => {{
let start = std::time::Instant::now();
let $resp = $x;
(serde_json::Number::from(start.elapsed().as_millis()), $resp)
}};
}