pub fn gpu_triton_join<T>(
data: &mut JoinData<T>,
hashing_scheme: HashingScheme,
histogram_algorithm_fst: DeviceType<CpuHistogramAlgorithm, GpuHistogramAlgorithm>,
histogram_algorithm_snd: DeviceType<CpuHistogramAlgorithm, GpuHistogramAlgorithm>,
partition_algorithm_fst: DeviceType<CpuRadixPartitionAlgorithm, GpuRadixPartitionAlgorithm>,
partition_algorithm_snd: DeviceType<CpuRadixPartitionAlgorithm, GpuRadixPartitionAlgorithm>,
radix_bits: &RadixBits,
dmem_buffer_bytes: usize,
max_partitions_cache_bytes: Option<usize>,
threads: usize,
cpu_affinity: CpuAffinity,
partitions_mem_type: MemType,
stream_state_mem_type: MemType,
page_type: PageType,
partition_dim: (&GridSize, &BlockSize),
join_dim: (&GridSize, &BlockSize)
) -> Result<(i64, RadixJoinPoint)> where
T: Default + Clone + DeviceCopy + Sync + Send + CpuRadixPartitionable + GpuRadixPartitionable + KeyAttribute + CudaHashJoinable + CpuHashJoinable + CudaRadixJoinable,