Skip to content
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
# SynapseServerlessImporter
sample scripts and ARM template to generically import files from flat file or sql sources to Serverless Synapse
# Summary
This repo is a collection of a Synapse workspace plus a storage account. It can be used as an accelerator or as a demo to show the value of using Synapse Serverless external tables and how they can be dynamically created on top of any file within storage. Right now, the solution demonstrates loading files from an http source, and then dynamically creating the external table within a Serverless SQL Pool for querying or initial transformations downstream. In the future we are considering adding sample scripts that demonstrate a similar approach for:

- flat files from an on-prem data source
- sql server from an on-prem data source

# Deployment
The simplest way to deploy this is to simply use the "Deploy to Azure" button below. No kidding! Just plug in some parameters, and the solution does the rest. It includes a sample list of 50 files from the now famous taxi cab dataset that the NYC Taxi and Limousine Commission makes available here: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page


[![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fchristophermschmidt%2FSynapseServerlessImporter%2Fdeploy%2Fdeploy%2FDeploy.json)
265 changes: 265 additions & 0 deletions deploy/Deploy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
{
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"resourcesBasePrefix": {
"type": "string",
"maxLength": 8,
"defaultValue": "ssi",
"metadata": {
"description": "The value to use for prefix for resources. Use only lowercase letters and numbers."
}
},
"storageSKU": {
"type": "string",
"allowedValues": [
"Standard_LRS",
"Standard_ZRS",
"Standard_GRS",
"Standard_RAGRS",
"Premium_LRS"
],
"defaultValue": "Standard_RAGRS",
"metadata": {
"description": "The type of replication to use for the storage account."
}
},
"sqlUsername": {
"type": "string",
"maxLength": 128,
"defaultValue": "sqladminuser",
"metadata": {
"description": "The sql server admin username."
}
},
"sqlPassword": {
"type": "securestring",
"maxLength": 128,
"metadata": {
"description": "The sql server admin password."
}
},
"akvUser": {
"type": "string",
"metadata": {
"description": "Specifies the object ID of a user, service principal or security group in the Azure Active Directory tenant for the vault. The object ID must be unique for the list of access policies. Get it by using Get-AzADUser or Get-AzADServicePrincipal cmdlets."
}
},
"storageRoleUniqueId": {
"defaultValue": "[newGuid()]",
"type": "String"
}
},
"variables": {
"storageName": "[concat(toLower(parameters('resourcesBasePrefix')),'storage', substring(uniqueString(resourceGroup().id),0,5))]",
"synapseName": "[concat(toLower(parameters('resourcesBasePrefix')),'synapse', substring(uniqueString(resourceGroup().id),0,5))]",
"keyvaultName": "[concat(toLower(parameters('resourcesBasePrefix')),'akv', substring(uniqueString(resourceGroup().id),0,5))]",
"storageBlobDataContributorRoleID": "ba92f5b4-2d11-453d-a403-e96b0029c9fe"
},
"resources": [
{
"type": "Microsoft.Storage/storageAccounts",
"apiVersion": "2020-08-01-preview",
"name": "[variables('storageName')]",
"location": "[resourceGroup().location]",
"sku": {
"name": "[parameters('storageSKU')]"
},
"kind": "StorageV2",
"properties": {
"isHnsEnabled": true,
"networkAcls": {
"bypass": "AzureServices",
"virtualNetworkRules": [],
"ipRules": [],
"defaultAction": "Allow"
},
"supportsHttpsTrafficOnly": true,
"encryption": {
"services": {
"file": {
"keyType": "Account",
"enabled": true
},
"blob": {
"keyType": "Account",
"enabled": true
}
},
"keySource": "Microsoft.Storage"
},
"accessTier": "Hot"
},
"resources": [
{
"type": "blobServices/containers",
"apiVersion": "2019-06-01",
"name": "default/synapseadls",
"dependsOn": [
"[variables('storageName')]"
],
"properties": {
"publicAccess": "None"
}
}
]
},
{
"type": "Microsoft.Synapse/workspaces",
"apiVersion": "2019-06-01-preview",
"name": "[variables('synapseName')]",
"location": "[resourceGroup().location]",
"identity": {
"type": "SystemAssigned"
},
"properties": {
"connectivityEndpoints": {
"web": "[concat('https://web.azuresynapse.net?workspace=%2fsubscriptions%2f',subscription().subscriptionId,'%2fresourceGroups%2f',resourceGroup().name,'%2fproviders%2fMicrosoft.Synapse%2fworkspaces%2f', variables('synapseName'))]",
"dev": "[concat('https://', variables('synapseName'), '.dev.azuresynapse.net')]",
"sqlOnDemand": "[concat(variables('synapseName'), '-ondemand.sql.azuresynapse.net')]",
"sql": "[concat(variables('synapseName'), '.sql.azuresynapse.net')]"
},
"defaultDataLakeStorage": {
"accountUrl": "[concat('https://',variables('storageName'),'.dfs.core.windows.net')]",
"filesystem": "synapseadls"
},
"sqlAdministratorLogin": "[parameters('sqlUsername')]",
"sqlAdministratorLoginPassword": "[parameters('sqlPassword')]",
"privateEndpointConnections": [],
"encryption": {}
}
},
{
"type": "Microsoft.Storage/storageAccounts/blobServices",
"apiVersion": "2020-08-01-preview",
"name": "[concat(variables('storageName'), '/default')]",
"dependsOn": [
"[resourceId('Microsoft.Storage/storageAccounts', variables('storageName'))]"
],
"sku": {
"name": "[parameters('storageSKU')]"
},
"properties": {
"cors": {
"corsRules": []
},
"deleteRetentionPolicy": {
"enabled": false
}
}
},
{
"type": "Microsoft.Synapse/workspaces/firewallRules",
"apiVersion": "2019-06-01-preview",
"name": "[concat(variables('synapseName'), '/allowAll')]",
"dependsOn": [
"[resourceId('Microsoft.Synapse/workspaces', variables('synapseName'))]"
],
"properties": {
"startIpAddress": "0.0.0.0",
"endIpAddress": "255.255.255.255"
}
},
{
"type": "Microsoft.KeyVault/vaults",
"apiVersion": "2018-02-14",
"name": "[variables('keyvaultName')]",
"location": "[resourceGroup().location]",
"dependsOn": [],
"tags": {},
"properties": {
"enabledForDeployment": false,
"enabledForTemplateDeployment": true,
"enabledForDiskEncryption": false,
"enableRbacAuthorization": false,
"accessPolicies": [
{
"tenantId": "[subscription().tenantId]",
"objectId": "[parameters('akvUser')]",
"permissions": {
"keys": [
"get",
"list",
"update",
"create",
"import",
"delete",
"recover",
"backup",
"restore"
],
"secrets": [
"get",
"set",
"list",
"delete",
"recover",
"backup",
"restore"
]
}
}
],
"tenantId": "[subscription().tenantId]",
"sku": {
"name": "standard",
"family": "A"
},
"enableSoftDelete": true,
"softDeleteRetentionInDays": "90",
"networkAcls": {
"defaultAction": "allow",
"bypass": "AzureServices",
"ipRules": [],
"virtualNetworkRules": []
}
}
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "storageRoleDeploymentResource",
"dependsOn": [
"[concat('Microsoft.Synapse/workspaces/', variables('synapseName'))]"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {},
"variables": {},
"resources": [
{
"type": "Microsoft.Storage/storageAccounts/providers/roleAssignments",
"apiVersion": "2018-09-01-preview",
"name": "[concat(variables('storageName'), '/Microsoft.Authorization/', guid(concat(resourceGroup().id, '/', variables('storageBlobDataContributorRoleID'), '/', variables('synapseName'), '/', parameters('storageRoleUniqueId'))))]",
"location": "[resourceGroup().location]",
"properties": {
"roleDefinitionId": "[resourceId('Microsoft.Authorization/roleDefinitions', variables('storageBlobDataContributorRoleID'))]",
"principalId": "[reference(concat('Microsoft.Synapse/workspaces/', variables('synapseName')), '2019-06-01-preview', 'Full').identity.principalId]",
"principalType": "ServicePrincipal"
}
}
]
}
},
"subscriptionId": "[subscription().subscriptionId]",
"resourceGroup": "[resourceGroup().name]"
}
],
"outputs": {
"synapseName": {
"type": "string",
"value": "[variables('synapseName')]"
},
"storageName": {
"type": "string",
"value": "[variables('storageName')]"
},
"keyvaultName": {
"type": "string",
"value": "[variables('keyvaultName')]"
}
}
}
Loading