ShiroOnigami23 commited on
Commit
dc19554
·
verified ·
1 Parent(s): 2dcdebe

Upload rosetta_generator.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. rosetta_generator.py +211 -0
rosetta_generator.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import random
3
+
4
+ # ============================
5
+ # CONFIGURATION
6
+ # ============================
7
+ OUTPUT_FILE = "rosetta_code_dataset.csv"
8
+ SAMPLES_PER_ALGO = 500 # High variation count for better matching
9
+
10
+ # ============================
11
+ # 1. THE ULTIMATE ALGORITHM LIBRARY
12
+ # ============================
13
+ ALGORITHMS = {
14
+ # ---------------------------
15
+ # BASIC MATH & LOGIC
16
+ # ---------------------------
17
+ "factorial": {
18
+ "prompts": ["factorial of a number", "calculate n!", "multiplication of 1 to n", "find factorial", "fact code"],
19
+ "python": "def factorial(n):\n return 1 if n == 0 else n * factorial(n-1)\nnum = int(input())\nprint(factorial(num))",
20
+ "cpp": "#include<iostream>\nusing namespace std;\nint factorial(int n) {\n return (n == 0) ? 1 : n * factorial(n - 1);\n}\nint main() {\n int n; cin>>n;\n cout << factorial(n);\n}",
21
+ "java": "import java.util.Scanner;\nclass Main {\n static int factorial(int n) {\n return (n == 0) ? 1 : n * factorial(n - 1);\n }\n public static void main(String[] args) {\n Scanner sc = new Scanner(System.in);\n System.out.println(factorial(sc.nextInt()));\n }\n}"
22
+ },
23
+ "fibonacci": {
24
+ "prompts": ["fibonacci series", "print fib numbers", "sequence 0 1 1 2 3", "fib series", "fibonacci recursion"],
25
+ "python": "n = int(input())\na, b = 0, 1\nfor _ in range(n):\n print(a, end=' ')\n a, b = b, a+b",
26
+ "cpp": "int n, a=0, b=1, next;\ncin >> n;\nfor (int i = 0; i < n; i++) {\n cout << a << \" \";\n next = a + b;\n a = b;\n b = next;\n}",
27
+ "java": "int n = 10, a = 0, b = 1;\nfor (int i = 0; i < n; i++) {\n System.out.print(a + \" \");\n int next = a + b;\n a = b;\n b = next;\n}"
28
+ },
29
+ "swap_two_numbers": {
30
+ "prompts": ["swap two numbers", "swap variables without temp", "exchange values", "swap logic"],
31
+ "python": "a = int(input())\nb = int(input())\na, b = b, a\nprint(a, b)",
32
+ "cpp": "int a, b;\ncin >> a >> b;\na = a + b;\nb = a - b;\na = a - b;\ncout << a << \" \" << b;",
33
+ "java": "int a = 5, b = 10;\na = a + b;\nb = a - b;\na = a - b;\nSystem.out.println(a + \" \" + b);"
34
+ },
35
+ "leap_year": {
36
+ "prompts": ["check leap year", "is year leap", "leap year logic", "days in february year"],
37
+ "python": "year = int(input())\nif (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):\n print('Leap Year')\nelse:\n print('Not Leap Year')",
38
+ "cpp": "int year;\ncin >> year;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n cout << \"Leap Year\";\nelse\n cout << \"Not Leap Year\";",
39
+ "java": "int year = 2024;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n System.out.println(\"Leap Year\");\nelse\n System.out.println(\"Not Leap Year\");"
40
+ },
41
+ "odd_even": {
42
+ "prompts": ["check odd even", "is number divisible by 2", "find even number", "odd number logic"],
43
+ "python": "num = int(input())\nif num % 2 == 0: print('Even')\nelse: print('Odd')",
44
+ "cpp": "int n; cin >> n;\nif(n % 2 == 0) cout << \"Even\";\nelse cout << \"Odd\";",
45
+ "java": "int n = 5;\nif(n % 2 == 0) System.out.println(\"Even\");\nelse System.out.println(\"Odd\");"
46
+ },
47
+ "lcm_hcf": {
48
+ "prompts": ["lcm and hcf", "gcd of two numbers", "least common multiple", "highest common factor"],
49
+ "python": "import math\na, b = 12, 15\ngcd = math.gcd(a, b)\nlcm = (a*b)//gcd\nprint('HCF:', gcd, 'LCM:', lcm)",
50
+ "cpp": "int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\nint main() {\n int a=12, b=15;\n cout<<\"HCF: \"<<gcd(a,b);\n cout<<\"LCM: \"<<(a*b)/gcd(a,b);\n}",
51
+ "java": "static int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\npublic static void main(String[] args) {\n int a=12, b=15;\n System.out.println(\"HCF: \"+gcd(a,b));\n System.out.println(\"LCM: \"+(a*b)/gcd(a,b));\n}"
52
+ },
53
+
54
+ # ---------------------------
55
+ # NUMBER THEORY
56
+ # ---------------------------
57
+ "prime_check": {
58
+ "prompts": ["check prime number", "is prime or not", "prime no program", "find if number is prime"],
59
+ "python": "num = int(input())\nif num > 1:\n for i in range(2, int(num**0.5)+1):\n if (num % i) == 0: print('Not Prime'); break\n else: print('Prime')\nelse: print('Not Prime')",
60
+ "cpp": "bool isPrime(int n) {\n if (n <= 1) return false;\n for (int i = 2; i * i <= n; i++)\n if (n % i == 0) return false;\n return true;\n}",
61
+ "java": "boolean isPrime(int n) {\n if (n <= 1) return false;\n for (int i = 2; i * i <= n; i++)\n if (n % i == 0) return false;\n return true;\n}"
62
+ },
63
+ "armstrong": {
64
+ "prompts": ["armstrong number", "sum of cubes of digits", "check armstrong", "narcissistic number"],
65
+ "python": "n = int(input())\nsum = 0\ntemp = n\nwhile temp > 0:\n digit = temp % 10\n sum += digit ** 3\n temp //= 10\nif n == sum: print('Armstrong')\nelse: print('Not Armstrong')",
66
+ "cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) cout<<\"Armstrong\";\nelse cout<<\"Not\";",
67
+ "java": "int n=153, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) System.out.println(\"Armstrong\");\nelse System.out.println(\"Not\");"
68
+ },
69
+ "palindrome_number": {
70
+ "prompts": ["palindrome number", "reverse number equal", "check number palindrome"],
71
+ "python": "n = input()\nif n == n[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
72
+ "cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) cout<<\"Palindrome\";\nelse cout<<\"Not\";",
73
+ "java": "int n=121, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) System.out.println(\"Palindrome\");\nelse System.out.println(\"Not\");"
74
+ },
75
+ "sum_of_digits": {
76
+ "prompts": ["sum of digits", "add all digits of number", "digit sum logic"],
77
+ "python": "n = int(input())\ns = 0\nwhile n > 0:\n s += n % 10\n n //= 10\nprint(s)",
78
+ "cpp": "int n, sum=0;\ncin >> n;\nwhile(n>0) { sum += n%10; n/=10; }\ncout << sum;",
79
+ "java": "int n=123, sum=0;\nwhile(n>0) { sum += n%10; n/=10; }\nSystem.out.println(sum);"
80
+ },
81
+ "decimal_to_binary": {
82
+ "prompts": ["decimal to binary", "convert dec to bin", "binary of number"],
83
+ "python": "n = int(input())\nprint(bin(n).replace('0b', ''))",
84
+ "cpp": "void decToBinary(int n) {\n int binaryNum[32];\n int i = 0;\n while (n > 0) {\n binaryNum[i] = n % 2;\n n = n / 2;\n i++;\n }\n for (int j = i - 1; j >= 0; j--) cout << binaryNum[j];\n}",
85
+ "java": "void decToBinary(int n) {\n System.out.println(Integer.toBinaryString(n));\n}"
86
+ },
87
+
88
+ # ---------------------------
89
+ # ARRAYS & MATRICES
90
+ # ---------------------------
91
+ "bubble_sort": {
92
+ "prompts": ["bubble sort", "sort array ascending", "sorting algorithm", "arrange elements"],
93
+ "python": "arr = [64, 34, 25, 12, 22, 11, 90]\nn = len(arr)\nfor i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j]\nprint(arr)",
94
+ "cpp": "void bubbleSort(int arr[], int n) {\n for (int i = 0; i < n-1; i++)\n for (int j = 0; j < n-i-1; j++)\n if (arr[j] > arr[j+1]) swap(arr[j], arr[j+1]);\n}",
95
+ "java": "void bubbleSort(int arr[]) {\n int n = arr.length;\n for (int i = 0; i < n-1; i++)\n for (int j = 0; j < n-i-1; j++)\n if (arr[j] > arr[j+1]) {\n int temp = arr[j]; arr[j] = arr[j+1]; arr[j+1] = temp;\n }\n}"
96
+ },
97
+ "linear_search": {
98
+ "prompts": ["linear search", "find element in array", "search number list"],
99
+ "python": "arr = [10, 20, 30, 40]\nx = 30\nif x in arr: print('Found')\nelse: print('Not Found')",
100
+ "cpp": "int search(int arr[], int n, int x) {\n for (int i = 0; i < n; i++)\n if (arr[i] == x) return i;\n return -1;\n}",
101
+ "java": "int search(int arr[], int x) {\n for (int i = 0; i < arr.length; i++)\n if (arr[i] == x) return i;\n return -1;\n}"
102
+ },
103
+ "largest_in_array": {
104
+ "prompts": ["largest element in array", "max in array", "find biggest number in list"],
105
+ "python": "arr = [10, 324, 45, 90, 9808]\nprint(max(arr))",
106
+ "cpp": "int largest(int arr[], int n) {\n int max = arr[0];\n for (int i = 1; i < n; i++)\n if (arr[i] > max) max = arr[i];\n return max;\n}",
107
+ "java": "int largest(int arr[]) {\n int max = arr[0];\n for (int i = 1; i < arr.length; i++)\n if (arr[i] > max) max = arr[i];\n return max;\n}"
108
+ },
109
+ "matrix_add": {
110
+ "prompts": ["matrix addition", "add two matrices", "sum of matrix"],
111
+ "python": "X = [[1,2,3], [4 ,5,6], [7 ,8,9]]\nY = [[9,8,7], [6,5,4], [3,2,1]]\nresult = [[X[i][j] + Y[i][j] for j in range(len(X[0]))] for i in range(len(X))]\nfor r in result: print(r)",
112
+ "cpp": "void addMatrix(int A[3][3], int B[3][3]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) cout<<A[i][j]+B[i][j]<<\" \";\n cout<<endl;\n }\n}",
113
+ "java": "void addMatrix(int A[][], int B[][]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) System.out.print(A[i][j]+B[i][j]+\" \");\n System.out.println();\n }\n}"
114
+ },
115
+ "matrix_transpose": {
116
+ "prompts": ["matrix transpose", "transpose of matrix", "swap rows and columns"],
117
+ "python": "X = [[12,7], [4 ,5], [3 ,8]]\nresult = [[X[j][i] for j in range(len(X))] for i in range(len(X[0]))]\nfor r in result: print(r)",
118
+ "cpp": "void transpose(int A[3][3]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) cout<<A[j][i]<<\" \";\n cout<<endl;\n }\n}",
119
+ "java": "void transpose(int A[][]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) System.out.print(A[j][i]+\" \");\n System.out.println();\n }\n}"
120
+ },
121
+
122
+ # ---------------------------
123
+ # STRINGS
124
+ # ---------------------------
125
+ "string_palindrome": {
126
+ "prompts": ["string palindrome", "check word palindrome", "reverse string equal"],
127
+ "python": "s = input()\nif s == s[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
128
+ "cpp": "string s; cin >> s;\nstring rev = string(s.rbegin(), s.rend());\nif (s == rev) cout << \"Palindrome\";\nelse cout << \"Not\";",
129
+ "java": "String str = \"madam\", rev = \"\";\nfor (int i = str.length() - 1; i >= 0; i--) rev = rev + str.charAt(i);\nif (str.equals(rev)) System.out.println(\"Palindrome\");"
130
+ },
131
+ "vowel_count": {
132
+ "prompts": ["count vowels", "number of vowels in string", "vowel consonant count"],
133
+ "python": "s = input().lower()\ncount = 0\nfor char in s:\n if char in 'aeiou': count += 1\nprint(count)",
134
+ "cpp": "string s; cin >> s;\nint count = 0;\nfor(char c : s) {\n if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}\ncout << count;",
135
+ "java": "String s = \"hello\";\nint count = 0;\nfor(int i=0; i<s.length(); i++) {\n char c = s.charAt(i);\n if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}"
136
+ },
137
+
138
+ # ---------------------------
139
+ # PATTERNS (EXAM FAVORITES)
140
+ # ---------------------------
141
+ "star_pyramid": {
142
+ "prompts": ["star pyramid", "triangle star pattern", "print pyramid"],
143
+ "python": "n = 5\nfor i in range(n):\n print(' '*(n-i-1) + '* '*(i+1))",
144
+ "cpp": "int n=5;\nfor(int i=1; i<=n; i++) {\n for(int j=1; j<=n-i; j++) cout<<\" \";\n for(int j=1; j<=i; j++) cout<<\"* \";\n cout<<endl;\n}",
145
+ "java": "int n=5;\nfor(int i=1; i<=n; i++) {\n for(int j=1; j<=n-i; j++) System.out.print(\" \");\n for(int j=1; j<=i; j++) System.out.print(\"* \");\n System.out.println();\n}"
146
+ },
147
+ "right_triangle": {
148
+ "prompts": ["right angle triangle", "star pattern right", "simple star pattern"],
149
+ "python": "n=5\nfor i in range(1, n+1):\n print('* ' * i)",
150
+ "cpp": "for(int i=1; i<=5; i++){\n for(int j=1; j<=i; j++) cout<<\"* \";\n cout<<endl;\n}",
151
+ "java": "for(int i=1; i<=5; i++){\n for(int j=1; j<=i; j++) System.out.print(\"* \");\n System.out.println();\n}"
152
+ },
153
+
154
+ # ---------------------------
155
+ # UTILITY
156
+ # ---------------------------
157
+ "calculator": {
158
+ "prompts": ["simple calculator", "add sub mul div", "switch case calculator", "calc program"],
159
+ "python": "def calc(a, b, op):\n if op == '+': return a + b\n elif op == '-': return a - b\n elif op == '*': return a * b\n elif op == '/': return a / b",
160
+ "cpp": "switch(op) {\n case '+': cout << a+b; break;\n case '-': cout << a-b; break;\n case '*': cout << a*b; break;\n case '/': cout << a/b; break;\n}",
161
+ "java": "switch(op) {\n case '+': System.out.println(a+b); break;\n case '-': System.out.println(a-b); break;\n case '*': System.out.println(a*b); break;\n case '/': System.out.println(a/b); break;\n}"
162
+ },
163
+ "hello_world": {
164
+ "prompts": ["hello world", "print hello", "basic program", "test code"],
165
+ "python": "print(\"Hello World\")",
166
+ "cpp": "#include <iostream>\nusing namespace std;\nint main() {\n cout << \"Hello World\";\n return 0;\n}",
167
+ "java": "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello World\");\n }\n}"
168
+ }
169
+ }
170
+
171
+ # ============================
172
+ # 2. GENERATOR LOGIC
173
+ # ============================
174
+ def generate_dataset():
175
+ print("Generating THE ULTIMATE Rosetta Stone Dataset...")
176
+ data = []
177
+
178
+ for algo_key, templates in ALGORITHMS.items():
179
+ base_prompts = templates["prompts"]
180
+
181
+ for _ in range(SAMPLES_PER_ALGO):
182
+ # 1. Randomize Prompt
183
+ prompt_base = random.choice(base_prompts)
184
+ lang = random.choice(["python", "cpp", "java"])
185
+
186
+ # 2. Create natural language variation
187
+ variations = [
188
+ f"{prompt_base} in {lang}",
189
+ f"write {lang} code for {prompt_base}",
190
+ f"how to {prompt_base} using {lang}",
191
+ f"program for {prompt_base} in {lang}",
192
+ f"give me {prompt_base} code {lang}"
193
+ ]
194
+ query = random.choice(variations)
195
+
196
+ # 3. Get Code
197
+ code = templates[lang]
198
+
199
+ data.append([query, lang, code])
200
+
201
+ # Save
202
+ with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
203
+ writer = csv.writer(f)
204
+ writer.writerow(["prompt", "language", "code"])
205
+ writer.writerows(data)
206
+
207
+ print(f"✅ Created {len(data)} training samples covering {len(ALGORITHMS)} major topics.")
208
+ print(f"Saved to {OUTPUT_FILE}")
209
+
210
+ if __name__ == "__main__":
211
+ generate_dataset()