1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
// Copyright 2019-2022 Clemens Lutz
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! A collection of data set generators for data sets frequently found in
//! published papers.
use super::relation::{KeyAttribute, UniformRelation};
use crate::error::Result;
use num_traits::FromPrimitive;
/// Generator for the Kim data set.
///
/// The Kim data set is taken from the paper Kim et al. "Sort vs. hash revisited:
/// Fast join implementation on modern multi-core CPUs" in PVLDB 2009.
///
/// The paper uses 4-byte keys / 8-byte tuples.
pub struct Kim;
impl Kim {
/// Rows in the primary key relation.
pub fn primary_key_len() -> usize {
128 * 10_usize.pow(6)
}
/// Rows in the foreign key relation.
pub fn foreign_key_len() -> usize {
128 * 10_usize.pow(6)
}
/// Generate the Kim data set.
///
/// Requires a slice for the primary key attribute, and a slice for the
/// foreign key attribute. Both slices must have the lengths specified by
/// the primary_key_len() and foreign_key_len() functions.
///
/// `selectivity` specifies the join selectivity in percent. An according
/// percentage of keys are set to the `NULL` value. By default (`None`), the
/// selectivity is 100%.
pub fn gen<T: Copy + Send + KeyAttribute + FromPrimitive>(
pk_attr: &mut [T],
fk_attr: &mut [T],
selectivity: Option<u32>,
) -> Result<()> {
assert!(pk_attr.len() == Self::primary_key_len());
assert!(fk_attr.len() == Self::foreign_key_len());
UniformRelation::gen_primary_key_par(pk_attr, selectivity)?;
UniformRelation::gen_attr_par(fk_attr, 0..pk_attr.len())?;
Ok(())
}
}
/// Generator for the Blanas data set.
///
/// The Blanas data set is taken from the paper Blanas et al. "Design and
/// evaluation of main memory hash join algorithms for multi-core CPUs" in
/// SIGMOD 2011.
///
/// The paper uses 8-byte keys / 16-byte tuples.
pub struct Blanas;
impl Blanas {
/// Rows in the primary key relation.
pub fn primary_key_len() -> usize {
16 * 2_usize.pow(20)
}
/// Rows in the foreign key relation.
pub fn foreign_key_len() -> usize {
256 * 2_usize.pow(20)
}
/// Generate the Blanas data set.
///
/// Requires a slice for the primary key attribute, and a slice for the
/// foreign key attribute. Both slices must have the lengths specified by
/// the primary_key_len() and foreign_key_len() functions.
///
/// `selectivity` specifies the join selectivity in percent. An according
/// percentage of keys are set to the `NULL` value. By default (`None`), the
/// selectivity is 100%.
pub fn gen<T: Copy + Send + KeyAttribute + FromPrimitive>(
pk_attr: &mut [T],
fk_attr: &mut [T],
selectivity: Option<u32>,
) -> Result<()> {
assert!(pk_attr.len() == Self::primary_key_len());
assert!(fk_attr.len() == Self::foreign_key_len());
UniformRelation::gen_primary_key_par(pk_attr, selectivity)?;
UniformRelation::gen_attr_par(fk_attr, 0..pk_attr.len())?;
Ok(())
}
}