Skip to content

Commit f0394fb

Browse files
author
bitoollearner
committed
LeetCode Pyspark Questions Solution
LeetCode Pyspark Questions Solution
1 parent 375fcf8 commit f0394fb

11 files changed

+1101
-88
lines changed

Solved/1070. Product Sales Analysis III (Medium)-(Solved).ipynb

Lines changed: 80 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "3f2ab884-5b2e-4c2a-b90f-eba6d7208edf",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "26c9d69f-b471-43b4-aaee-40c41d01994e",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "fa1c2280-e46c-403e-8996-25b01a84ec72",
4655
"showTitle": false,
@@ -76,7 +85,7 @@
7685
"product_id is the primary key (column with unique values) of this table.\n",
7786
"Each row of this table indicates the product name of each product.\n",
7887
" \n",
79-
"**Write a solution to select the product id, year, quantity, and price for the first year of every product sold.**\n",
88+
"**Write a solution to select the product name, year, quantity, and price for the first year of every product sold.**\n",
8089
"\n",
8190
"Return the resulting table in any order.\n",
8291
"\n",
@@ -100,18 +109,21 @@
100109
"| 300 | Samsung |\n",
101110
"\n",
102111
"**Output:**\n",
103-
"| product_id | first_year | quantity | price |\n",
112+
"| product_name | first_year | quantity | price |\n",
104113
"|------------|------------|----------|-------| \n",
105-
"| 100 | 2008 | 10 | 5000 |\n",
106-
"| 200 | 2011 | 15 | 9000 |\n"
114+
"| Nokia | 2008 | 10 | 5000 |\n",
115+
"| Apple | 2011 | 15 | 9000 |\n"
107116
]
108117
},
109118
{
110119
"cell_type": "code",
111120
"execution_count": 0,
112121
"metadata": {
113122
"application/vnd.databricks.v1+cell": {
114-
"cellMetadata": {},
123+
"cellMetadata": {
124+
"byteLimit": 2048000,
125+
"rowLimit": 10000
126+
},
115127
"inputWidgets": {},
116128
"nuid": "dddf2382-d3f5-49ba-9108-34f298edfb81",
117129
"showTitle": false,
@@ -141,25 +153,81 @@
141153
"product_df_1070 = spark.createDataFrame(product_data_1070, product_columns_1070)\n",
142154
"product_df_1070.show()"
143155
]
156+
},
157+
{
158+
"cell_type": "code",
159+
"execution_count": 0,
160+
"metadata": {
161+
"application/vnd.databricks.v1+cell": {
162+
"cellMetadata": {
163+
"byteLimit": 2048000,
164+
"rowLimit": 10000
165+
},
166+
"inputWidgets": {},
167+
"nuid": "d5132975-4102-42a4-bba0-1f72af2fed4a",
168+
"showTitle": false,
169+
"tableResultSettingsMap": {},
170+
"title": ""
171+
}
172+
},
173+
"outputs": [],
174+
"source": [
175+
"windowSpec = Window.partitionBy(\"product_id\").orderBy(\"year\")\n",
176+
"\n",
177+
"sales_with_rank_df_1070 = sales_df_1070.withColumn(\"rn\", row_number().over(windowSpec))"
178+
]
179+
},
180+
{
181+
"cell_type": "code",
182+
"execution_count": 0,
183+
"metadata": {
184+
"application/vnd.databricks.v1+cell": {
185+
"cellMetadata": {
186+
"byteLimit": 2048000,
187+
"rowLimit": 10000
188+
},
189+
"inputWidgets": {},
190+
"nuid": "4d6c4f21-1b67-4b5b-a63a-b8fb100f0175",
191+
"showTitle": false,
192+
"tableResultSettingsMap": {},
193+
"title": ""
194+
}
195+
},
196+
"outputs": [],
197+
"source": [
198+
"sales_with_rank_df_1070\\\n",
199+
" .join(product_df_1070, on=\"product_id\", how=\"inner\")\\\n",
200+
" .filter(col(\"rn\") == 1)\\\n",
201+
" .select(\"product_name\",col(\"year\").alias(\"first_year\"),\"quantity\",\"price\").show()"
202+
]
144203
}
145204
],
146205
"metadata": {
147206
"application/vnd.databricks.v1+notebook": {
148-
"computePreferences": null,
207+
"computePreferences": {
208+
"hardware": {
209+
"accelerator": null,
210+
"gpuPoolId": null,
211+
"memory": null
212+
}
213+
},
149214
"dashboards": [],
150215
"environmentMetadata": {
151216
"base_environment": "",
152-
"environment_version": "1"
217+
"environment_version": "2"
153218
},
154219
"inputWidgetPreferences": null,
155220
"language": "python",
156221
"notebookMetadata": {
157222
"pythonIndentUnit": 4
158223
},
159-
"notebookName": "1070. Product Sales Analysis III (Medium)",
224+
"notebookName": "1070. Product Sales Analysis III (Medium)-(Solved)",
160225
"widgets": {}
226+
},
227+
"language_info": {
228+
"name": "python"
161229
}
162230
},
163231
"nbformat": 4,
164232
"nbformat_minor": 0
165-
}
233+
}

Solved/1075. Project Employees I (Easy)-(Solved).ipynb

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "3f2ab884-5b2e-4c2a-b90f-eba6d7208edf",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "26c9d69f-b471-43b4-aaee-40c41d01994e",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "fa1c2280-e46c-403e-8996-25b01a84ec72",
4655
"showTitle": false,
@@ -114,7 +123,10 @@
114123
"execution_count": 0,
115124
"metadata": {
116125
"application/vnd.databricks.v1+cell": {
117-
"cellMetadata": {},
126+
"cellMetadata": {
127+
"byteLimit": 2048000,
128+
"rowLimit": 10000
129+
},
118130
"inputWidgets": {},
119131
"nuid": "dddf2382-d3f5-49ba-9108-34f298edfb81",
120132
"showTitle": false,
@@ -147,25 +159,58 @@
147159
"employee_df_1075 = spark.createDataFrame(employee_data_1075, employee_columns_1075)\n",
148160
"employee_df_1075.show()"
149161
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": 0,
166+
"metadata": {
167+
"application/vnd.databricks.v1+cell": {
168+
"cellMetadata": {
169+
"byteLimit": 2048000,
170+
"rowLimit": 10000
171+
},
172+
"inputWidgets": {},
173+
"nuid": "27c75734-7084-4511-ad4f-e6e5a769427e",
174+
"showTitle": false,
175+
"tableResultSettingsMap": {},
176+
"title": ""
177+
}
178+
},
179+
"outputs": [],
180+
"source": [
181+
"project_df_1075\\\n",
182+
" .join(employee_df_1075, on=\"employee_id\", how=\"inner\")\\\n",
183+
" .groupBy(\"project_id\") \\\n",
184+
" .agg(round(avg(\"experience_years\"), 2).alias(\"average_years\")).show()"
185+
]
150186
}
151187
],
152188
"metadata": {
153189
"application/vnd.databricks.v1+notebook": {
154-
"computePreferences": null,
190+
"computePreferences": {
191+
"hardware": {
192+
"accelerator": null,
193+
"gpuPoolId": null,
194+
"memory": null
195+
}
196+
},
155197
"dashboards": [],
156198
"environmentMetadata": {
157199
"base_environment": "",
158-
"environment_version": "1"
200+
"environment_version": "2"
159201
},
160202
"inputWidgetPreferences": null,
161203
"language": "python",
162204
"notebookMetadata": {
163205
"pythonIndentUnit": 4
164206
},
165-
"notebookName": "1075. Project Employees I (Easy)",
207+
"notebookName": "1075. Project Employees I (Easy)-(Solved)",
166208
"widgets": {}
209+
},
210+
"language_info": {
211+
"name": "python"
167212
}
168213
},
169214
"nbformat": 4,
170215
"nbformat_minor": 0
171-
}
216+
}

0 commit comments

Comments
 (0)