diff --git a/deployment/infra/OlToPurviewMappings.json b/deployment/infra/OlToPurviewMappings.json index ad30b9e..ac992e4 100644 --- a/deployment/infra/OlToPurviewMappings.json +++ b/deployment/infra/OlToPurviewMappings.json @@ -395,6 +395,19 @@ "qualifiedName": "mysql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/{nameGroups[0]}", "purviewDataType": "azure_mysql_table", "purviewPrefix": "mysql" + }, + { + "name": "azureCosmos", + "parserConditions": [ + { + "op1": "prefix", + "compare": "=", + "op2": "azurecosmos" + } + ], + "qualifiedName": "https://{nameSpcBodyParts[0]}/{nameSpcBodyParts[1]}/{nameSpcBodyParts[2]}/{nameGroups[0]}", + "purviewDataType": "azure_cosmosdb_sqlapi_collection", + "purviewPrefix": "https" } ] } \ No newline at end of file diff --git a/deployment/infra/newdeploymenttemp.json b/deployment/infra/newdeploymenttemp.json index 074d98e..6ec57eb 100644 --- a/deployment/infra/newdeploymenttemp.json +++ b/deployment/infra/newdeploymenttemp.json @@ -190,7 +190,7 @@ }, { "name": "OlToPurviewMappings", - "value": "{\"olToPurviewMappings\":[{\"name\":\"wasbs\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"wasbs\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_blob_path\",\"purviewPrefix\":\"https\"},{\"name\":\"wasb\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"wasb\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_blob_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfsBlobRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfsRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfssBlobRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfssRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfsBlob\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfs\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfssBlob\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfss\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"synapseSqlNonDbo\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameSpcBodyParts[0]\",\"compare\":\"contains\",\"op2\":\"azuresynapse\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"azure_synapse_dedicated_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"synapseSql\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameSpcBodyParts[0]\",\"compare\":\"contains\",\"op2\":\"azuresynapse\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/dbo/{nameGroups[0].parts[0]}\",\"purviewDataType\":\"azure_synapse_dedicated_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azureSQLNonDbo\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameGroups\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0]}/{nameGroups[1]}\",\"purviewDataType\":\"azure_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azureSQLNonDboNoDotsInNames\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"azure_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azureSQL\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/dbo/{nameGroups[0]}\",\"purviewDataType\":\"azure_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azurePostgresNonPublic\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"},{\"op1\":\"nameSpcConParts\",\"compare\":\">\",\"op2\":\"4\"},{\"op1\":\"nameSpcConParts[3]\",\"compare\":\"=\",\"op2\":\"azure\"}],\"qualifiedName\":\"postgresql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"azure_postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"azurePostgres\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"},{\"op1\":\"nameSpcConParts\",\"compare\":\">\",\"op2\":\"4\"},{\"op1\":\"nameSpcConParts[3]\",\"compare\":\"=\",\"op2\":\"azure\"}],\"qualifiedName\":\"postgresql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/public/{nameGroups[0]}\",\"purviewDataType\":\"azure_postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"postgresNonPublic\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"postgresql://servers/{nameSpcBodyParts[0]}:{nameSpcBodyParts[1]}/dbs/{nameSpcBodyParts[2]}/schemas/{nameGroups[0].parts[0]}/tables/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"postgres\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"}],\"qualifiedName\":\"postgresql://servers/{nameSpcBodyParts[0]}:{nameSpcBodyParts[1]}/dbs/{nameSpcBodyParts[2]}/schemas/public/tables/{nameGroups[0]}\",\"purviewDataType\":\"postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"hiveManagedTableNotDefault\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"dbfs\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"contains\",\"op2\":\"hive/warehouse\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"4\"}],\"qualifiedName\":\"{nameGroups[0].parts[3]}.{nameGroups[0].parts[5]}@{AdbWorkspaceUrl}\",\"purviewDataType\":\"hive_table\",\"purviewPrefix\":\"hive\"},{\"name\":\"hiveManagedTableDefault\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"dbfs\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"contains\",\"op2\":\"hive/warehouse\"}],\"qualifiedName\":\"default.{nameGroups[0].parts[3]}@{AdbWorkspaceUrl}\",\"purviewDataType\":\"hive_table\",\"purviewPrefix\":\"hive\"},{\"name\":\"azureMySql\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"mysql\"}],\"qualifiedName\":\"mysql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_mysql_table\",\"purviewPrefix\":\"mysql\"}]}" + "value": "{\"olToPurviewMappings\":[{\"name\":\"wasbs\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"wasbs\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_blob_path\",\"purviewPrefix\":\"https\"},{\"name\":\"wasb\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"wasb\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_blob_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfsBlobRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfsRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfssBlobRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfssRootFS\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"=\",\"op2\":\"\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_filesystem\",\"purviewPrefix\":\"https\"},{\"name\":\"abfsBlob\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfs\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfs\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfssBlob\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"},{\"op1\":\"nameSpcBodyParts[1]\",\"compare\":\"contains\",\"op2\":\"blob\"}],\"qualifiedName\":\"https://{nameSpcConParts[0]}.dfs.{nameSpcConParts[2]}.{nameSpcConParts[3]}.{nameSpcConParts[4]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"abfss\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"abfss\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[1]}/{nameSpcBodyParts[0]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_datalake_gen2_path\",\"purviewPrefix\":\"https\"},{\"name\":\"synapseSqlNonDbo\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameSpcBodyParts[0]\",\"compare\":\"contains\",\"op2\":\"azuresynapse\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"azure_synapse_dedicated_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"synapseSql\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameSpcBodyParts[0]\",\"compare\":\"contains\",\"op2\":\"azuresynapse\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/dbo/{nameGroups[0].parts[0]}\",\"purviewDataType\":\"azure_synapse_dedicated_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azureSQLNonDbo\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameGroups\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0]}/{nameGroups[1]}\",\"purviewDataType\":\"azure_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azureSQLNonDboNoDotsInNames\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"azure_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azureSQL\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"sqlserver\"}],\"qualifiedName\":\"mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/dbo/{nameGroups[0]}\",\"purviewDataType\":\"azure_sql_table\",\"purviewPrefix\":\"mssql\"},{\"name\":\"azurePostgresNonPublic\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"},{\"op1\":\"nameSpcConParts\",\"compare\":\">\",\"op2\":\"4\"},{\"op1\":\"nameSpcConParts[3]\",\"compare\":\"=\",\"op2\":\"azure\"}],\"qualifiedName\":\"postgresql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"azure_postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"azurePostgres\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"},{\"op1\":\"nameSpcConParts\",\"compare\":\">\",\"op2\":\"4\"},{\"op1\":\"nameSpcConParts[3]\",\"compare\":\"=\",\"op2\":\"azure\"}],\"qualifiedName\":\"postgresql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/public/{nameGroups[0]}\",\"purviewDataType\":\"azure_postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"postgresNonPublic\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"1\"}],\"qualifiedName\":\"postgresql://servers/{nameSpcBodyParts[0]}:{nameSpcBodyParts[1]}/dbs/{nameSpcBodyParts[2]}/schemas/{nameGroups[0].parts[0]}/tables/{nameGroups[0].parts[1]}\",\"purviewDataType\":\"postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"postgres\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"postgresql\"}],\"qualifiedName\":\"postgresql://servers/{nameSpcBodyParts[0]}:{nameSpcBodyParts[1]}/dbs/{nameSpcBodyParts[2]}/schemas/public/tables/{nameGroups[0]}\",\"purviewDataType\":\"postgresql_table\",\"purviewPrefix\":\"postgresql\"},{\"name\":\"hiveManagedTableNotDefault\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"dbfs\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"contains\",\"op2\":\"hive/warehouse\"},{\"op1\":\"nameGroups[0].parts\",\"compare\":\">\",\"op2\":\"4\"}],\"qualifiedName\":\"{nameGroups[0].parts[3]}.{nameGroups[0].parts[5]}@{AdbWorkspaceUrl}\",\"purviewDataType\":\"hive_table\",\"purviewPrefix\":\"hive\"},{\"name\":\"hiveManagedTableDefault\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"dbfs\"},{\"op1\":\"nameGroups[0]\",\"compare\":\"contains\",\"op2\":\"hive/warehouse\"}],\"qualifiedName\":\"default.{nameGroups[0].parts[3]}@{AdbWorkspaceUrl}\",\"purviewDataType\":\"hive_table\",\"purviewPrefix\":\"hive\"},{\"name\":\"azureMySql\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"mysql\"}],\"qualifiedName\":\"mysql://{nameSpcBodyParts[0]}/{nameSpcBodyParts[2]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_mysql_table\",\"purviewPrefix\":\"mysql\"},{\"name\":\"azureCosmos\",\"parserConditions\":[{\"op1\":\"prefix\",\"compare\":\"=\",\"op2\":\"azurecosmos\"}],\"qualifiedName\":\"https://{nameSpcBodyParts[0]}/{nameSpcBodyParts[1]}/{nameSpcBodyParts[2]}/{nameGroups[0]}\",\"purviewDataType\":\"azure_cosmosdb_sqlapi_collection\",\"purviewPrefix\":\"https\"}]}" }, { "name": "PurviewAccountName", diff --git a/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/OlMessageConsolodation.cs b/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/OlMessageConsolodation.cs index 2a4687b..576a44f 100644 --- a/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/OlMessageConsolodation.cs +++ b/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/OlMessageConsolodation.cs @@ -143,12 +143,26 @@ private async Task ProcessStartEvent(Event olEvent, string jobRunId, Envir } try { - var entity = new TableEntity(TABLE_PARTITION, olEvent.Run.RunId) + if (olEvent.Inputs.Count > 0) + // Store inputs and env facet. { - { "EnvFacet", JsonConvert.SerializeObject(olEvent.Run.Facets.EnvironmentProperties) } - }; + var entity = new TableEntity(TABLE_PARTITION, olEvent.Run.RunId) + { + { "EnvFacet", JsonConvert.SerializeObject(olEvent.Run.Facets.EnvironmentProperties) }, + { "Inputs", JsonConvert.SerializeObject(olEvent.Inputs) } + + }; + await _tableClient.AddEntityAsync(entity); + } + else { + // Store only env facet. + var entity = new TableEntity(TABLE_PARTITION, olEvent.Run.RunId) + { + { "EnvFacet", JsonConvert.SerializeObject(olEvent.Run.Facets.EnvironmentProperties) } - await _tableClient.AddEntityAsync(entity); + }; + await _tableClient.AddEntityAsync(entity); + } } catch (RequestFailedException ex) { @@ -159,6 +173,7 @@ private async Task ProcessStartEvent(Event olEvent, string jobRunId, Envir _log.LogError(ex, $"OlMessageConsolodation-ProcessStartEvent: Error {ex.Message} when processing entity"); return false; } + return true; } @@ -170,6 +185,7 @@ private async Task JoinEventData(Event olEvent, string jobRunId) } TableEntity te; + TableEntity te_inputs; // Processing time can sometimes cause complete events int retryCount = 4; @@ -195,6 +211,28 @@ private async Task JoinEventData(Event olEvent, string jobRunId) await Task.Delay(delay); } + // Get inputs. Todo: Check if more efficient to get inputs within the same while loop above. Can we get 2 entities at the same time? + currentRetry = 0; + while (true) + { + try + { + _log.LogInformation("Trying to get inputs"); + te_inputs = await _tableClient.GetEntityAsync(TABLE_PARTITION, olEvent.Run.RunId, new string[] { "Inputs" }); + break; + } + catch (RequestFailedException) + { + currentRetry++; + _log.LogWarning($"Start event was missing, retrying to consolidate message to get inputs. Retry count: {currentRetry}"); + if (currentRetry > retryCount) + { + return false; + } + } + await Task.Delay(delay); + } + // Add Environment to event var envFacet = JsonConvert.DeserializeObject(te["EnvFacet"].ToString() ?? ""); if (envFacet is null) @@ -204,15 +242,28 @@ private async Task JoinEventData(Event olEvent, string jobRunId) } olEvent.Run.Facets.EnvironmentProperties = envFacet; - // clean up table over time - try - { - var delresp = await _tableClient.DeleteEntityAsync(TABLE_PARTITION, olEvent.Run.RunId); - } - catch (Exception ex) - { - _log.LogError(ex, $"OlMessageConsolodation-JoinEventData: Error {ex.Message} when deleting entity"); - } + // Add Inputs to event if not already there (will only be done for DataSourceV2 sources) + if (olEvent.Inputs.Count == 0) { + var inputs = JsonConvert.DeserializeObject>(te_inputs["Inputs"].ToString() ?? ""); + + if (inputs is null) + { + _log.LogWarning($"OlMessageConsolodation-JoinEventData: Warning: no inputs found for datasource v2 COMPLETE event"); + return false; + } + olEvent.Inputs = inputs; + + } + + // clean up table over time. + try + { + var delresp = await _tableClient.DeleteEntityAsync(TABLE_PARTITION, olEvent.Run.RunId); + } + catch (Exception ex) + { + _log.LogError(ex, $"OlMessageConsolodation-JoinEventData: Error {ex.Message} when deleting entity"); + } return true; } @@ -228,11 +279,32 @@ private bool IsStartEventEnvironment(Event olEvent) return false; } + /// + /// Helper function to determine if the event is one of + /// the data source v2 ones which need to aggregate data + /// from the start and complete events + /// + private bool isDataSourceV2Event(Event olEvent) { + string[] special_cases = {"azurecosmos://", "iceberg://"}; // todo: make this configurable? + // Don't need to process START events here as they have both inputs and outputs + if (olEvent.EventType == "START") return false; + + foreach (var outp in olEvent.Outputs) + { + foreach (var source in special_cases) + { + if (outp.NameSpace.StartsWith(source)) return true; + } + } + return false; + } + private bool IsJoinEvent(Event olEvent) { + string[] special_cases = {"cosmos", "iceberg"}; if (olEvent.EventType == COMPLETE_EVENT) { - if (olEvent.Inputs.Count > 0 && olEvent.Outputs.Count > 0) + if ((olEvent.Inputs.Count > 0 && olEvent.Outputs.Count > 0) || (olEvent.Outputs.Count > 0 && isDataSourceV2Event(olEvent))) { return true; } diff --git a/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/ValidateOlEvent.cs b/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/ValidateOlEvent.cs index 984eb3b..74486f0 100644 --- a/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/ValidateOlEvent.cs +++ b/function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/ValidateOlEvent.cs @@ -29,10 +29,31 @@ public ValidateOlEvent(ILoggerFactory loggerFactory) _log = loggerFactory.CreateLogger(); } + /// + /// Helper function to determine if the event is one of + /// the data source v2 ones which need to aggregate data + /// from the start and complete events + /// + private bool isDataSourceV2Event(Event olEvent) { + string[] special_cases = {"azurecosmos://", "iceberg://"}; // todo: make this configurable? + // Don't need to process START events here as they have both inputs and outputs + if (olEvent.EventType == "START") return false; + + foreach (var outp in olEvent.Outputs) + { + foreach (var source in special_cases) + { + if (outp.NameSpace.StartsWith(source)) return true; + } + } + return false; + } + /// /// Performs initial validation of OpenLineage input /// The tested criteria include: /// 1. Events have both inputs and outputs + /// a. Except for special cases covered in isDataSourceV2Event /// 2. Events do not have the same input and output /// 3. EventType is START or COMPLETE /// 4. If EventType is START, there is a Environment Facet @@ -40,7 +61,7 @@ public ValidateOlEvent(ILoggerFactory loggerFactory) /// OpenLineage Event message /// true if input is valid, false if not public bool Validate(Event olEvent){ - if (olEvent.Inputs.Count > 0 && olEvent.Outputs.Count > 0) + if ((olEvent.Inputs.Count > 0 && olEvent.Outputs.Count > 0) || (olEvent.Outputs.Count > 0 && isDataSourceV2Event(olEvent))) { // Need to rework for multiple inputs and outputs in one packet - possibly combine and then hash if (InOutEqual(olEvent)) diff --git a/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/DatabricksToPurviewParser.cs b/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/DatabricksToPurviewParser.cs index 9ac9bfb..22ecf1e 100644 --- a/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/DatabricksToPurviewParser.cs +++ b/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/DatabricksToPurviewParser.cs @@ -280,7 +280,6 @@ public DatabricksProcess GetDatabricksProcess(string taskQn) { var databricksProcess = new DatabricksProcess(); //var ColumnAttributes = new ColumnLevelAttributes(); - var inputs = new List(); foreach (IInputsOutputs input in _eEvent.OlEvent!.Inputs) { @@ -292,10 +291,9 @@ public DatabricksProcess GetDatabricksProcess(string taskQn) { outputs.Add(GetInputOutputs(output)); } - databricksProcess.Attributes = GetProcAttributes(taskQn, inputs,outputs,_eEvent.OlEvent); //databricksProcess.Attributes.ColumnMapping = JsonConvert.SerializeObject(_colParser.GetColIdentifiers()); - databricksProcess.RelationshipAttributes.Task.QualifiedName = taskQn; + databricksProcess.RelationshipAttributes.Task.QualifiedName = taskQn; return databricksProcess; } diff --git a/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/QnParser.cs b/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/QnParser.cs index b02ca72..d7c805e 100644 --- a/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/QnParser.cs +++ b/function-app/adb-to-purview/src/Function.Domain/Helpers/parser/QnParser.cs @@ -59,6 +59,7 @@ public PurviewIdentifier GetIdentifiers(string nameSpace, string name) // Break the name and nameSpace values into their individual / referencable parts var olParts = new OlParts(nameSpace, name); + // Get a dictionary assigning the configuration string keys to each of the olParts var olDynParts = olParts.GetDynamicPairs(JSON_KEY_NAMES); @@ -75,7 +76,6 @@ public PurviewIdentifier GetIdentifiers(string nameSpace, string name) // Use the relevant configuration mapping and the olParts to construct the PurviewIdentifier purviewIdentifier = GetPurviewIdentifier(mapping, olDynParts); - return purviewIdentifier; } diff --git a/function-app/adb-to-purview/src/Function.Domain/Services/OlConsolodateEnrich.cs b/function-app/adb-to-purview/src/Function.Domain/Services/OlConsolodateEnrich.cs index b0868c6..1c3db3a 100644 --- a/function-app/adb-to-purview/src/Function.Domain/Services/OlConsolodateEnrich.cs +++ b/function-app/adb-to-purview/src/Function.Domain/Services/OlConsolodateEnrich.cs @@ -69,8 +69,10 @@ public OlConsolodateEnrich( var validateOlEvent = new ValidateOlEvent(_loggerFactory); var olMessageConsolodation = new OlMessageConsolodation(_loggerFactory, _configuration); + var olEnrichMessage = new OlMessageEnrichment(_loggerFactory, _configuration); + // Validate the event if (!validateOlEvent.Validate(_event)) { diff --git a/function-app/adb-to-purview/src/Function.Domain/Services/OlToPurviewParsingService.cs b/function-app/adb-to-purview/src/Function.Domain/Services/OlToPurviewParsingService.cs index b8c6199..02c50f0 100644 --- a/function-app/adb-to-purview/src/Function.Domain/Services/OlToPurviewParsingService.cs +++ b/function-app/adb-to-purview/src/Function.Domain/Services/OlToPurviewParsingService.cs @@ -49,17 +49,20 @@ public OlToPurviewParsingService(ILoggerFactory loggerFactory, IConfiguration co _logger.LogWarning($"OlToPurviewParsingService-GetPurviewFromOlEventAsync: Event data is not valid - eventData: {JsonConvert.SerializeObject(eventData)}"); return null; } - IDatabricksToPurviewParser parser = new DatabricksToPurviewParser(_loggerFactory, _config, eventData); if (eventData.IsInteractiveNotebook) { return ParseInteractiveNotebook(parser); + + } + else if (parser.GetJobType() == JobType.JobNotebook) { return ParseJobNotebook(parser); } + else { return ParseJobTask(parser); @@ -71,7 +74,6 @@ private string ParseInteractiveNotebook(IDatabricksToPurviewParser parser) var databricksWorkspace = parser.GetDatabricksWorkspace(); var databricksNotebook = parser.GetDatabricksNotebook(databricksWorkspace.Attributes.QualifiedName, true); var databricksProcess = parser.GetDatabricksProcess(databricksNotebook.Attributes.QualifiedName); - var databricksWorkspaceStr = JsonConvert.SerializeObject(databricksWorkspace); var databricksNotebookStr = JsonConvert.SerializeObject(databricksNotebook); var databricksProcessStr = JsonConvert.SerializeObject(databricksProcess); diff --git a/function-app/adb-to-purview/src/Functions/PurviewOut.cs b/function-app/adb-to-purview/src/Functions/PurviewOut.cs index cc24cdd..61c6479 100644 --- a/function-app/adb-to-purview/src/Functions/PurviewOut.cs +++ b/function-app/adb-to-purview/src/Functions/PurviewOut.cs @@ -42,6 +42,7 @@ public async Task Run( _logger.LogInformation($"Start event, duplicate event, or no context found - eventData: {input}"); return ""; } + var purviewEvent = _olToPurviewParsingService.GetPurviewFromOlEvent(enrichedEvent); if (purviewEvent == null) {