Completed DynamoDB + DAX Benchmarker with a nice TUI to boot

This commit is contained in:
hamilcarBarca17
2023-08-02 18:11:41 -06:00
parent 09862c1b43
commit e42070eefa
55 changed files with 3574 additions and 1 deletions
+315
View File
@@ -0,0 +1,315 @@
use std::{env, time::Duration};
use anyhow::anyhow;
use aws_sdk_dynamodb::Client;
use chrono::Utc;
use clap::Parser;
use elasticsearch::{
auth::Credentials,
http::{
transport::{SingleNodeConnectionPool, TransportBuilder},
Url,
},
indices::IndicesPutMappingParts,
Elasticsearch,
};
use log::{error, info, warn, LevelFilter};
use log4rs::{
append::console::ConsoleAppender,
config::{Appender, Root},
encode::pattern::PatternEncoder,
};
use models::{DynamoDbSimulationMetrics, DynamoOperation};
use rand::{
rngs::{OsRng, StdRng},
Rng, SeedableRng,
};
use serde_json::json;
use tokio::{
select,
sync::mpsc::{self, Receiver, Sender},
task::JoinHandle,
};
use tokio_util::sync::CancellationToken;
use crate::{models::Scenario, simulators::Simulator};
mod models;
mod simulators;
mod timer_utils;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// The number of concurrent simulations to run
#[arg(short, long, default_value_t = 1000)]
concurrent_simulations: u32,
/// The number of attributes to use when populating and querying the DynamoDB table; minimum value of 1
#[arg(short, long, default_value_t = 5)]
attributes: u32,
/// The length of time (in seconds) to run the benchmark for
#[arg(short, long, default_value_t = 1800)]
duration: u64,
/// The buffer size of the Elasticsearch thread's MPSC channel
#[arg(short, long, default_value_t = 500)]
buffer: usize,
/// Local Elasticsearch cluster username
#[arg(short, long, default_value_t = String::from("elastic"))]
username: String,
/// Local Elasticsearch cluster password
#[arg(short, long, default_value_t = String::from("changeme"))]
password: String,
/// The Elasticsearch Index to insert data into
#[arg(short, long, default_value_t = String::from("dynamodb"))]
index: String,
/// The DynamoDB table to perform operations against
#[arg(short, long, default_value_t = format!("{}-high-velocity-table", env::var("USER").unwrap()))]
table_name: String,
/// Whether to run a read-only scenario for benchmarking
#[arg(short, long)]
read_only: bool,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
log4rs::init_config(init_logging_config())?;
let cancellation_token = CancellationToken::new();
let (es_tx, es_rx) = mpsc::channel::<DynamoDbSimulationMetrics>(cli.buffer);
std::thread::spawn(move || {
start_elasticsearch_publisher(es_rx, cli.username, cli.password, cli.index)
});
let handles: Vec<JoinHandle<_>> = (0..cli.concurrent_simulations)
.map(|_| {
let tx = es_tx.clone();
let token = cancellation_token.clone();
let table_name = cli.table_name.clone();
tokio::spawn(async move {
let config = aws_config::load_from_env().await;
let dynamodb_client = Client::new(&config);
match scan_all_partition_keys(&dynamodb_client, table_name.clone()).await {
Ok(partition_keys_vec) => {
let simulator = Simulator::new(
&dynamodb_client,
table_name.clone(),
cli.attributes,
&partition_keys_vec,
);
select! {
_ = token.cancelled() => {
warn!("Task cancelled. Shutting down...");
}
_ = simulation_loop(simulator, cli.read_only, tx) => ()
}
}
Err(e) => error!("Unable to fetch partition keys: {e:?}"),
}
})
})
.collect();
tokio::spawn(async move {
info!(
"Starting timer task. Executing for {} seconds",
cli.duration
);
tokio::time::sleep(Duration::from_secs(cli.duration)).await;
cancellation_token.cancel();
});
for handle in handles {
match handle.await {
Ok(_) => info!("Task shut down gracefully"),
Err(e) => warn!("Task did not shut down gracefully {e:?}"),
}
}
Ok(())
}
#[tokio::main]
async fn start_elasticsearch_publisher(
mut elasticsearch_rx: Receiver<DynamoDbSimulationMetrics>,
username: String,
password: String,
index: String,
) -> anyhow::Result<()> {
let url = Url::parse("http://localhost:9200")?;
let connection_pool = SingleNodeConnectionPool::new(url);
let credentials = Credentials::Basic(username, password);
let transport = TransportBuilder::new(connection_pool)
.auth(credentials)
.build()?;
let es_client = Elasticsearch::new(transport);
info!("Setting the explicit mappings for the {index} index");
es_client
.indices()
.put_mapping(IndicesPutMappingParts::Index(&[&index]))
.body(json!({
"properties": {
"timestamp": {
"type": "date"
}
}
}))
.send()
.await?;
while let Some(metric) = elasticsearch_rx.recv().await {
info!("Publishing metrics to Elasticsearch...");
let es_response = es_client
.index(elasticsearch::IndexParts::Index(&index))
.body(metric)
.send()
.await;
match es_response {
Ok(resp) => {
if resp.status_code().is_success() {
info!("Successfully published metrics to Elasticsearch");
} else {
error!("Was unable to publish metrics to Elasticsearch! Received non 2XX response");
}
}
Err(e) => {
error!("Unable to publish metrics to Elasticsearch! {e:?}");
}
}
}
Ok(())
}
async fn simulation_loop(
mut simulator: Simulator<'_>,
read_only: bool,
tx: Sender<DynamoDbSimulationMetrics>,
) {
let mut rng = StdRng::from_seed(OsRng.gen());
loop {
let mut metrics = DynamoDbSimulationMetrics::default();
metrics.timestamp = Utc::now();
let simulation_time = time!(match {
if read_only {
info!("Running a read-only simulation...");
metrics.scenario = Scenario::ReadOnly;
run_read_only_simulation(&mut simulator, &mut metrics, &mut rng).await
} else {
info!("Running a CRUD simulation...");
metrics.scenario = Scenario::Crud;
run_crud_simulation(&mut simulator, &mut metrics, &mut rng).await
}
} {
Ok(_) => {
info!("Simulation completed successfully!");
metrics.successful = true;
}
Err(e) => error!("Simulation did not complete. Encountered the following error: {e:?}"),
});
metrics.simulation_time = Some(simulation_time);
info!("Metrics: {metrics:?}");
match tx.send(metrics).await {
Ok(_) => info!("Metrics sent down channel successfully"),
Err(e) => error!("Metrics were unable to be sent down the channel! {e:?}"),
}
}
}
async fn run_read_only_simulation(
simulator: &mut Simulator<'_>,
metrics: &mut DynamoDbSimulationMetrics,
rng: &mut StdRng,
) -> anyhow::Result<()> {
tokio::time::sleep(Duration::from_secs(rng.gen_range(0..15))).await;
metrics.operation = DynamoOperation::Read;
simulator.simulate_read_operation(metrics).await?;
Ok(())
}
async fn run_crud_simulation(
simulator: &mut Simulator<'_>,
metrics: &mut DynamoDbSimulationMetrics,
rng: &mut StdRng,
) -> anyhow::Result<()> {
match DynamoOperation::from(rng.gen_range(0..3)) {
DynamoOperation::Read => {
metrics.operation = DynamoOperation::Read;
simulator.simulate_read_operation(metrics).await?
}
DynamoOperation::Write => {
metrics.operation = DynamoOperation::Write;
simulator.simulate_write_operation(metrics).await?;
}
DynamoOperation::Update => {
metrics.operation = DynamoOperation::Update;
simulator.simulate_update_operation(metrics).await?;
}
}
Ok(())
}
async fn scan_all_partition_keys(
dynamodb_client: &Client,
table_name: String,
) -> anyhow::Result<Vec<String>> {
info!("Fetching a large list of partition keys to randomly read...");
let response = dynamodb_client
.scan()
.table_name(table_name)
.limit(10000)
.projection_expression("id")
.send()
.await;
match response {
Ok(resp) => {
info!("Fetched partition keys!");
let partition_keys = resp
.items()
.unwrap()
.into_iter()
.map(|attribute| {
attribute
.values()
.last()
.unwrap()
.as_s()
.unwrap()
.to_string()
})
.collect::<Vec<String>>();
info!("Found a total of {} keys", partition_keys.len());
Ok(partition_keys)
}
Err(e) => {
error!("Unable to fetch partition keys! {e:?}");
Err(anyhow!(e))
}
}
}
fn init_logging_config() -> log4rs::Config {
let stdout = ConsoleAppender::builder()
.encoder(Box::new(PatternEncoder::new(
"{d(%Y-%m-%d %H:%M:%S%.3f)(utc)} <{i}> [{l}] {f}:{L} - {m}{n}",
)))
.build();
log4rs::Config::builder()
.appender(Appender::builder().build("stdout", Box::new(stdout)))
.build(Root::builder().appender("stdout").build(LevelFilter::Info))
.unwrap()
}
+102
View File
@@ -0,0 +1,102 @@
use std::collections::HashMap;
use aws_sdk_dynamodb::types::AttributeValue;
use chrono::{DateTime, Utc};
use rand::Rng;
use serde::Serialize;
use serde_json::Number;
use uuid::Uuid;
#[derive(Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub enum DynamoOperation {
#[default]
Read,
Write,
Update,
}
impl From<i32> for DynamoOperation {
fn from(value: i32) -> Self {
match value {
0 => DynamoOperation::Read,
1 => DynamoOperation::Write,
2 => DynamoOperation::Update,
_ => DynamoOperation::Read,
}
}
}
#[derive(Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub enum Scenario {
#[default]
Crud,
ReadOnly,
}
#[derive(Debug)]
pub struct BenchmarkingItem(HashMap<String, AttributeValue>);
impl From<HashMap<String, AttributeValue>> for BenchmarkingItem {
fn from(value: HashMap<String, AttributeValue>) -> BenchmarkingItem {
BenchmarkingItem(value)
}
}
impl BenchmarkingItem {
pub fn new(attributes: u32) -> BenchmarkingItem {
let mut benchmarking_item = HashMap::<String, AttributeValue>::new();
let mut rng = rand::thread_rng();
benchmarking_item.insert(
"id".to_owned(),
AttributeValue::S(Uuid::new_v4().to_string()),
);
(0..attributes).for_each(|i| {
if let 0 = i % 2 {
benchmarking_item.insert(i.to_string(), AttributeValue::S(lipsum::lipsum_words(15)));
} else {
benchmarking_item.insert(
i.to_string(),
AttributeValue::N(rng.gen_range(0.0..=32.0).to_string()),
);
}
});
BenchmarkingItem(benchmarking_item)
}
pub fn get_id(&self) -> AttributeValue {
self.0.get("id").cloned().unwrap()
}
pub fn insert(&mut self, key: &str, val: AttributeValue) -> Option<AttributeValue> {
self.0.insert(key.to_owned(), val)
}
pub(crate) fn get(&self, key: &str) -> Option<&AttributeValue> {
self.0.get(key)
}
pub fn extract_map(&self) -> HashMap<String, AttributeValue> {
self.0.clone()
}
}
#[derive(Serialize, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub struct DynamoDbSimulationMetrics {
pub operation: DynamoOperation,
pub timestamp: DateTime<Utc>,
pub successful: bool,
pub scenario: Scenario,
pub simulation_time: Option<Number>,
pub read_time: Option<Number>,
pub write_time: Option<Number>,
pub write_item_confirmation_time: Option<Number>,
pub update_time: Option<Number>,
pub update_item_confirmation_time: Option<Number>,
pub delete_time: Option<Number>,
pub delete_item_confirmation_time: Option<Number>,
}
+72
View File
@@ -0,0 +1,72 @@
use aws_sdk_dynamodb::types::AttributeValue;
use log::{error, info};
use crate::{models::DynamoDbSimulationMetrics, time};
use super::{utils, Simulator};
impl<'a> Simulator<'a> {
pub(super) async fn assert_item_was_created(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let partition_key = utils::extract_partition_key(id.clone());
let mut attempts_exhausted = false;
let write_confirmation_time = time!(for i in 0..10 {
info!("Attempt {i}: Fetching newly added item with partition key: {partition_key}");
match self.read_item(id.clone(), metrics, false).await? {
Some(_) => {
info!("Successfully read new item with partition key: {partition_key}");
break;
}
None => {
error!("Unable to find new item with partition key: {partition_key}");
if i == 9 {
error!("All attempts to fetch the newly added item with partition key: {partition_key} failed!");
attempts_exhausted = true;
}
}
};
});
if !attempts_exhausted {
metrics.write_item_confirmation_time = Some(write_confirmation_time);
}
Ok(())
}
pub(super) async fn assert_item_was_deleted(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let partition_key = utils::extract_partition_key(id.clone());
let mut attempts_exhausted = false;
let delete_confirmation_time = time!(for i in 0..10 {
info!("Attempt {i}: Fetching deleted item with partition key: {partition_key}...");
match self.read_item(id.clone(), metrics, false).await? {
Some(_) => {
error!("Item with partition key {partition_key} was not deleted as expected!");
if i == 9 {
error!("All attempts to receive an empty response to verify item with partition key: {partition_key} was deleted failed!");
attempts_exhausted = true;
}
}
None => {
info!("Item with partition key {partition_key} was successfully deleted.");
break;
}
}
});
if !attempts_exhausted {
metrics.delete_item_confirmation_time = Some(delete_confirmation_time);
}
Ok(())
}
}
+140
View File
@@ -0,0 +1,140 @@
use aws_sdk_dynamodb::{types::AttributeValue, Client};
use log::{error, info};
use rand::{
rngs::{OsRng, StdRng},
Rng, SeedableRng,
};
use crate::{models::DynamoDbSimulationMetrics, time};
mod assertions;
mod operations;
mod utils;
pub struct Simulator<'a> {
dynamodb_client: &'a Client,
table_name: String,
attributes: u32,
partition_keys_vec: &'a [String],
rng: StdRng,
}
impl<'a> Simulator<'a> {
pub fn new(
dynamodb_client: &'a Client,
table_name: String,
attributes: u32,
partition_keys_vec: &'a [String],
) -> Simulator<'a> {
Simulator {
dynamodb_client,
table_name,
attributes,
partition_keys_vec,
rng: StdRng::from_seed(OsRng.gen()),
}
}
pub async fn simulate_read_operation(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
info!("Performing READ Operation...");
let partition_key =
self.partition_keys_vec[self.rng.gen_range(0..self.partition_keys_vec.len())].clone();
let id = AttributeValue::S(partition_key.clone());
for i in 0..10 {
info!("Attempt {i}: Fetching existing item with partition key: {partition_key}");
match self.read_item(id.clone(), metrics, true).await? {
Some(_) => {
info!("Successfully read existing item with partition key: {partition_key}");
break;
}
None => {
error!("Unable to find existing item with partition key: {partition_key}");
if i == 9 {
error!(
"All attempts to fetch the existing item with partition key: {partition_key} failed!"
);
}
}
}
}
Ok(())
}
pub async fn simulate_write_operation(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
info!("Performing WRITE operation...");
let benchmarking_item = self.put_item(metrics).await?;
let id = benchmarking_item.get_id();
self.assert_item_was_created(id.clone(), metrics).await?;
self.delete_item(id.clone(), metrics).await?;
self.assert_item_was_deleted(id, metrics).await?;
Ok(())
}
pub async fn simulate_update_operation(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
info!("Performing UPDATE operation...");
let new_item = self.put_item(metrics).await?;
let id = new_item.get_id();
let partition_key = utils::extract_partition_key(id.clone());
let mut attempts_exhausted = false;
self.assert_item_was_created(id.clone(), metrics).await?;
self.update_item(id.clone(), metrics).await?;
let update_confirmation_time = time!(for i in 0..10 {
info!("Attempt {i}: Fetching updated item for partition key: {partition_key}...");
let updated_item = self.read_item(id.clone(), metrics, false).await?.unwrap();
let new_item_attribute_value = new_item
.get("1")
.cloned()
.unwrap()
.as_n()
.unwrap()
.to_string();
let updated_item_attribute_value = updated_item
.get("1")
.cloned()
.unwrap()
.as_n()
.unwrap()
.to_string();
if new_item_attribute_value != updated_item_attribute_value {
info!("Confirmed update for partition key: {partition_key}");
break;
} else {
error!("Update for partition key {partition_key} failed! Values are still equal!");
if i == 9 {
error!("Exhausted attempts to fetch updated item!");
attempts_exhausted = true;
}
}
});
if !attempts_exhausted {
metrics.update_item_confirmation_time = Some(update_confirmation_time);
}
self.delete_item(id.clone(), metrics).await?;
self.assert_item_was_deleted(id, metrics).await?;
Ok(())
}
}
+144
View File
@@ -0,0 +1,144 @@
use anyhow::anyhow;
use aws_sdk_dynamodb::types::AttributeValue;
use log::{error, info};
use crate::{
models::{BenchmarkingItem, DynamoDbSimulationMetrics},
time,
};
use super::{utils::extract_partition_key, Simulator};
impl<'a> Simulator<'a> {
pub async fn read_item(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
record_metrics: bool,
) -> anyhow::Result<Option<BenchmarkingItem>> {
let partition_key = extract_partition_key(id.clone());
let (read_time, response) = time!(
resp,
self
.dynamodb_client
.get_item()
.table_name(self.table_name.clone())
.key("id", id)
.send()
.await
);
if record_metrics {
metrics.read_time = Some(read_time);
}
match response {
Ok(resp) => {
info!("Found item: {}", partition_key);
if let Some(item) = resp.item() {
info!("Fetched item: {item:?}");
Ok(Some(BenchmarkingItem::from(item.clone())))
} else {
info!("No items found with partition key: {partition_key}");
Ok(None)
}
}
Err(e) => {
error!("Could not fetch item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
pub async fn update_item(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let mut updated_item = BenchmarkingItem::new(self.attributes);
updated_item.insert("id", id.clone());
let partition_key = extract_partition_key(id);
let (update_time, response) = time!(
resp,
self
.dynamodb_client
.put_item()
.table_name(self.table_name.clone())
.set_item(Some(updated_item.extract_map()))
.send()
.await
);
metrics.update_time = Some(update_time);
match response {
Ok(_) => {
info!("Successfully updated item with partition_key: {partition_key}");
Ok(())
}
Err(e) => {
error!("Could not update item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
pub async fn put_item(
&mut self,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<BenchmarkingItem> {
let new_item = BenchmarkingItem::new(self.attributes);
let partition_key = extract_partition_key(new_item.get("id").cloned().unwrap());
let (time, response) = time!(
resp,
self
.dynamodb_client
.put_item()
.table_name(self.table_name.clone())
.set_item(Some(new_item.extract_map()))
.send()
.await
);
metrics.write_time = Some(time);
match response {
Ok(_) => {
info!("Successfully put new item with partition key: {partition_key}");
Ok(new_item)
}
Err(e) => {
error!("Could not put new item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
pub async fn delete_item(
&mut self,
id: AttributeValue,
metrics: &mut DynamoDbSimulationMetrics,
) -> anyhow::Result<()> {
let partition_key = extract_partition_key(id.clone());
let (delete_time, response) = time!(
resp,
self
.dynamodb_client
.delete_item()
.table_name(self.table_name.clone())
.key("id", id)
.send()
.await
);
metrics.delete_time = Some(delete_time);
match response {
Ok(_) => {
info!("Successfully deleted item with partition key: {partition_key}");
Ok(())
}
Err(e) => {
error!("Could not delete item with partition key: {partition_key}. {e:?}");
Err(anyhow!(e))
}
}
}
}
+5
View File
@@ -0,0 +1,5 @@
use aws_sdk_dynamodb::types::AttributeValue;
pub(super) fn extract_partition_key(id: AttributeValue) -> String {
id.clone().as_s().unwrap().to_string()
}
+14
View File
@@ -0,0 +1,14 @@
#[macro_export]
macro_rules! time {
($x:expr) => {{
let start = std::time::Instant::now();
let _result = $x;
serde_json::Number::from(start.elapsed().as_millis())
}};
($resp:ident, $x:expr) => {{
let start = std::time::Instant::now();
let $resp = $x;
(serde_json::Number::from(start.elapsed().as_millis()), $resp)
}};
}