This repository was archived by the owner on Feb 15, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFINAL-CODE.cpp
More file actions
133 lines (104 loc) · 5.43 KB
/
FINAL-CODE.cpp
File metadata and controls
133 lines (104 loc) · 5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <iostream>
#include <cmath>
#include <cuda_runtime.h>
#include <gmp.h>
#include <gmpxx.h>
#include <chrono>
#include <vector>
// Konstanten für die Chudnovsky-Methode
__constant__ double A = 13591409.0;
__constant__ double B = 545140134.0;
__constant__ double C = 640320.0;
__constant__ double C3_24 = (C * C * C) / 24.0;
// CUDA-Kernel für die Berechnung der Chudnovsky-Terme
__global__ void compute_chudnovsky_terms(double *numerators, double *denominators, int start, int n) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < n) {
int k = start + idx;
numerators[idx] = tgamma(6 * k + 1.0) / (tgamma(3 * k + 1.0) * pow(tgamma(k + 1.0), 3));
denominators[idx] = pow(-C3_24, k) * (A + B * k);
}
}
// CUDA-Kernel für die Berechnung der Riemann-Zeta-Terme
__global__ void compute_zeta(double *results, int terms, double s) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < terms) {
double term = 1.0 / pow(idx + 1, s);
atomicAdd(results, term);
}
}
// Funktion zur Fehlerprüfung
inline void check_cuda_error(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
std::cerr << "CUDA Error (" << msg << "): " << cudaGetErrorString(err) << std::endl;
exit(EXIT_FAILURE);
}
}
// Karatsuba-Multiplikation für große Zahlen
mpz_class karatsuba_multiply(const mpz_class &a, const mpz_class &b) {
if (a < 10 || b < 10) return a * b;
size_t n = std::max(a.get_str().size(), b.get_str().size());
size_t m = n / 2;
mpz_class high1 = a / mpz_class(pow(10, m));
mpz_class low1 = a % mpz_class(pow(10, m));
mpz_class high2 = b / mpz_class(pow(10, m));
mpz_class low2 = b % mpz_class(pow(10, m));
mpz_class z0 = karatsuba_multiply(low1, low2);
mpz_class z1 = karatsuba_multiply(low1 + high1, low2 + high2);
mpz_class z2 = karatsuba_multiply(high1, high2);
return z2 * mpz_class(pow(10, 2 * m)) + (z1 - z2 - z0) * mpz_class(pow(10, m)) + z0;
}
// Host-Funktion zur Berechnung von Pi mit GMP, CUDA und Zeta
void calculate_pi_combined(int terms_count, int threads_per_block, double zeta_s) {
// Speicher für Chudnovsky-Terme
double *device_numerators, *device_denominators;
size_t size = terms_count * sizeof(double);
check_cuda_error(cudaMalloc(&device_numerators, size), "cudaMalloc (numerators)");
check_cuda_error(cudaMalloc(&device_denominators, size), "cudaMalloc (denominators)");
// CUDA-Konfiguration
int blocks = (terms_count + threads_per_block - 1) / threads_per_block;
auto start_time = std::chrono::high_resolution_clock::now();
// CUDA-Kernel für Chudnovsky-Terme ausführen
compute_chudnovsky_terms<<<blocks, threads_per_block>>>(device_numerators, device_denominators, 0, terms_count);
check_cuda_error(cudaGetLastError(), "Kernel-Ausführung (Chudnovsky)");
// Ergebnisse zurück zum Host kopieren
std::vector<double> host_numerators(terms_count);
std::vector<double> host_denominators(terms_count);
check_cuda_error(cudaMemcpy(host_numerators.data(), device_numerators, size, cudaMemcpyDeviceToHost), "cudaMemcpy (numerators)");
check_cuda_error(cudaMemcpy(host_denominators.data(), device_denominators, size, cudaMemcpyDeviceToHost), "cudaMemcpy (denominators)");
// CUDA-Kernel für Riemann-Zeta-Terme ausführen
double *device_zeta_result;
check_cuda_error(cudaMalloc(&device_zeta_result, sizeof(double)), "cudaMalloc (zeta result)");
check_cuda_error(cudaMemset(device_zeta_result, 0, sizeof(double)), "cudaMemset (zeta result)");
compute_zeta<<<blocks, threads_per_block>>>(device_zeta_result, terms_count, zeta_s);
check_cuda_error(cudaGetLastError(), "Kernel-Ausführung (Zeta)");
double zeta_result;
check_cuda_error(cudaMemcpy(&zeta_result, device_zeta_result, sizeof(double), cudaMemcpyDeviceToHost), "cudaMemcpy (zeta result)");
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = end_time - start_time;
std::cout << "CUDA-Kernel Laufzeit: " << elapsed.count() << " Sekunden" << std::endl;
// GMP-Berechnung und Validierung
mpf_set_default_prec(1000000); // Hohe Präzision setzen
mpf_class sum_numerator = 0;
mpf_class sum_denominator = 0;
for (int i = 0; i < terms_count; i++) {
sum_numerator += host_numerators[i];
sum_denominator += host_denominators[i];
}
mpf_class pi_chudnovsky = (426880 * sqrt(mpf_class(10005))) / (sum_numerator / sum_denominator);
std::cout << "Berechnetes Pi (Chudnovsky): " << pi_chudnovsky << std::endl;
std::cout << "Berechnetes Zeta-Wert: " << zeta_result << std::endl;
// Speicher freigeben
check_cuda_error(cudaFree(device_numerators), "cudaFree (numerators)");
check_cuda_error(cudaFree(device_denominators), "cudaFree (denominators)");
check_cuda_error(cudaFree(device_zeta_result), "cudaFree (zeta result)");
}
int main() {
int terms_count = 100000; // Anzahl der Terme
int threads_per_block = 256; // Standardanzahl von Threads pro Block
double zeta_s = 2.0; // Parameter für Zeta-Funktion
std::cout << "Weltrekordversuch zur Berechnung von Pi mit CUDA, GMP und Zeta" << std::endl;
std::cout << "Anzahl der Terme: " << terms_count << std::endl;
calculate_pi_combined(terms_count, threads_per_block, zeta_s);
return 0;
}