@@ -136,9 +136,8 @@ dataset:
136136 clusterStatus: false
137137 clusterKeyPosition: -1
138138 criticalDataElementStatus: false
139- tags: null
140- classification: null
141- encryptedColumnName: null
139+ tags: []
140+ classification: public
142141 transformSourceTables:
143142 - table_name_1
144143 - table_name_2
@@ -161,9 +160,9 @@ dataset:
161160 clusterStatus: true
162161 clusterKeyPosition: 1
163162 criticalDataElementStatus: false
164- tags: null
165- classification: null
166- encryptedColumnName: null
163+ tags: []
164+ classification: restricted
165+ encryptedColumnName: enc_rcvr_id
167166 - column: rcvr_cntry_code
168167 isPrimary: false # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
169168 primaryKeyPosition: -1
@@ -177,8 +176,8 @@ dataset:
177176 clusterStatus: false
178177 clusterKeyPosition: -1
179178 criticalDataElementStatus: false
180- tags: null
181- classification: null
179+ tags: []
180+ classification: public
182181 authoritativeDefinitions:
183182 - url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25
184183 type: businessDefinition
@@ -197,7 +196,7 @@ dataset:
197196| dataset.table | | Yes | Name of the table being cataloged; the value should only contain the table name. Do not include the project or dataset name in the value. |
198197| dataset.table.physicalName | | No | Physical name of the table, default value is table name + version separated by underscores, as `table_1_2_0`. |
199198| dataset.table.priorTableName | | No | Name of the previous version of the dataset, if applicable. |
200- | dataset.table.description | | No | List of links to sources that provide more detail on column logic or values; examples would be URL to a GitHub repo, Collibra, on another tool. |
199+ | dataset.table.description | | No | Description of the dataset. |
201200| dataset.table.authoritativeDefinitions | | No | List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. Authoritative definitions follow the same structure in the standard. |
202201| dataset.table.dataGranularity | | No | Granular level of the data in the table. Example would be `pmt_txn_id`. |
203202| dataset.table.columns | | Yes | Array. A list of columns in the table. |
@@ -375,14 +374,10 @@ stakeholders:
375374 - username: mhopper
376375 role: Data Scientist
377376 dateIn: 2022-10-01
378- dateOut: null
379- replacedByUsername: null
380377 - username: daustin
381378 role: Owner
382379 comment: Keeper of the grail
383380 dateIn: 2022-10-01
384- dateOut: null
385- replacedByUsername: null
386381` ` `
387382
388383# ## Definitions
@@ -518,243 +513,4 @@ contractCreatedTs: 2022-11-15 02:59:43
518513
519514# # Full example
520515
521- ` ` ` yaml
522- # What's this data contract about?
523- datasetDomain: seller # Domain
524- quantumName: my quantum # Data product name
525- userConsumptionMode: Analytical
526- version: 1.1.0 # Version (follows semantic versioning)
527- status: current
528- uuid: 53581432-6c55-4ba2-a65f-72344a91553a
529-
530- # Lots of information
531- description:
532- purpose: Views built on top of the seller tables.
533- limitations: null
534- usage: null
535- tenant: ClimateQuantumInc
536-
537- # Getting support
538- productDl: product-dl@ClimateQuantum.org
539- productSlackChannel: '#product-help'
540- productFeedbackUrl: null
541-
542- # Physical parts / GCP / BigQuery specific
543- sourcePlatform: googleCloudPlatform
544- sourceSystem: bigQuery
545- datasetProject: edw # BQ dataset
546- datasetName: access_views # BQ dataset
547-
548- kind: DataContract
549- apiVersion: 2.3.0 # Standard version (follows semantic versioning, previously known as templateVersion)
550-
551- type: tables
552-
553- # Physical access
554- driver: null
555- driverVersion: null
556- server: null
557- database: pypl-edw.pp_access_views
558- username: '${env.username}'
559- password: '${env.password}'
560- schedulerAppName: name_coming_from_scheduler # NEW 2.1.0 Required if you want to schedule stuff, comes from DataALM.
561-
562- # Dataset, schema and quality
563- dataset:
564- - table: tbl
565- physicalName: tbl_1 # NEW in v2.1.0, Optional, default value is table name + version separated by underscores, as table_1_2_0
566- priorTableName: null # if needed
567- description: Provides core payment metrics
568- authoritativeDefinitions: # NEW in v2.2.0, inspired by the column-level authoritative links
569- - url: https://catalog.data.gov/dataset/air-quality
570- type: businessDefinition
571- - url: https://youtu.be/jbY1BKFj9ec
572- type: videoTutorial
573- tags: null
574- dataGranularity: Aggregation on columns txn_ref_dt, pmt_txn_id
575- columns:
576- - column: txn_ref_dt
577- isPrimary: false # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
578- primaryKeyPosition: -1
579- businessName: transaction reference date
580- logicalType: date
581- physicalType: date
582- isNullable: false
583- description: null
584- partitionStatus: true
585- partitionKeyPosition: 1
586- clusterStatus: false
587- clusterKeyPosition: -1
588- criticalDataElementStatus: false
589- tags: null
590- classification: null
591- encryptedColumnName: null
592- transformSourceTables:
593- - table_name_1
594- - table_name_2
595- - table_name_3
596- transformLogic: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1, table_name_2 as t2, table_name_3 as t3 where t1.txn_dt=date-3
597- transformDescription: defines the logic in business terms; logic for dummies
598- sampleValues:
599- - 2022-10-03
600- - 2020-01-28
601- - column: rcvr_id
602- isPrimary: true # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
603- primaryKeyPosition: 1
604- businessName: receiver id
605- logicalType: string
606- physicalType: varchar(18)
607- isNullable: false
608- description: A description for column rcvr_id.
609- partitionStatus: false
610- partitionKeyPosition: -1
611- clusterStatus: true
612- clusterKeyPosition: 1
613- criticalDataElementStatus: false
614- tags: null
615- classification: null
616- encryptedColumnName: null
617- - column: rcvr_cntry_code
618- isPrimary: false # NEW in v2.1.0, Optional, default value is false, indicates whether the column is primary key in the table.
619- primaryKeyPosition: -1
620- businessName: receiver country code
621- logicalType: string
622- physicalType: varchar(2)
623- isNullable: false
624- description: null
625- partitionStatus: false
626- partitionKeyPosition: -1
627- clusterStatus: false
628- clusterKeyPosition: -1
629- criticalDataElementStatus: false
630- tags: null
631- classification: null
632- authoritativeDefinitions:
633- - url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25
634- type: businessDefinition
635- - url: https://github.com/myorg/myrepo
636- type: transformationImplementation
637- - url: jdbc:postgresql://localhost:5432/adventureworks/tbl_1/rcvr_cntry_code
638- type: implementation
639- encryptedColumnName: rcvr_cntry_code_encrypted
640- quality:
641- - code: nullCheck
642- templateName: NullCheck
643- description: column should not contain null values
644- toolName: Elevate
645- toolRuleName: DQ.rw.tab1_2_0_0.rcvr_cntry_code.NullCheck
646- dimension: completeness # dropdown 7 values
647- type: dataQuality
648- severity: error
649- businessImpact: operational
650- scheduleCronExpression: 0 20 * * *
651- customProperties:
652- - property: FIELD_NAME
653- value:
654- - property: COMPARE_TO
655- value:
656- - property: COMPARISON_TYPE
657- value: Greater than
658- quality:
659- - code: countCheck # Required, name of the rule
660- templateName: CountCheck # NEW in v2.1.0 Required
661- description: Ensure row count is within expected volume range # Optional
662- toolName: Elevate # Required
663- toolRuleName: DQ.rw.tab1.CountCheck # NEW in v2.1.0 Optional (Available only to the users who can change in source code edition)
664- dimension: completeness # Optional
665- type: reconciliation # Optional NEW in v2.1.0 default value for column level check - dataQuality and for table level reconciliation
666- severity: error # Optional NEW in v2.1.0, default value is error
667- businessImpact: operational # Optional NEW in v2.1.0
668- scheduleCronExpression: 0 20 * * * # Optional NEW in v2.1.0 default schedule - every day 10 a.m. UTC
669-
670- # Pricing
671- price:
672- priceAmount: 9.95
673- priceCurrency: USD
674- priceUnit: megabyte
675-
676- # Stakeholders
677- stakeholders:
678- - username: ceastwood
679- role: Data Scientist
680- dateIn: 2022-08-02
681- dateOut: 2022-10-01
682- replacedByUsername: mhopper
683- - username: mhopper
684- role: Data Scientist
685- dateIn: 2022-10-01
686- dateOut: null
687- replacedByUsername: null
688- - username: daustin
689- role: Owner
690- comment: Keeper of the grail
691- dateIn: 2022-10-01
692- dateOut: null
693- replacedByUsername: null
694-
695- # Roles
696- roles:
697- - role: microstrategy_user_opr
698- access: read
699- firstLevelApprovers: Reporting Manager
700- secondLevelApprovers: 'mandolorian'
701- - role: bq_queryman_user_opr
702- access: read
703- firstLevelApprovers: Reporting Manager
704- secondLevelApprovers: na
705- - role: risk_data_access_opr
706- access: read
707- firstLevelApprovers: Reporting Manager
708- secondLevelApprovers: 'dathvador'
709- - role: bq_unica_user_opr
710- access: write
711- firstLevelApprovers: Reporting Manager
712- secondLevelApprovers: 'mickey'
713-
714- # SLA
715- slaDefaultColumn: tab1.txn_ref_dt # Optional, default value is partitionColumn.
716- slaProperties:
717- - property: latency # Property, see list of values in DP QoS
718- value: 4
719- unit: d # d, day, days for days; y, yr, years for years
720- column: tab1.txn_ref_dt # This would not be needed as it is the same table.column as the default one
721- - property: generalAvailability
722- value: 2022-05-12T09:30:10-08:00
723- - property: endOfSupport
724- value: 2032-05-12T09:30:10-08:00
725- - property: endOfLife
726- value: 2042-05-12T09:30:10-08:00
727- - property: retention
728- value: 3
729- unit: y
730- column: tab1.txn_ref_dt
731- - property: frequency
732- value: 1
733- valueExt: 1
734- unit: d
735- column: tab1.txn_ref_dt
736- - property: timeOfAvailability
737- value: 09:00-08:00
738- column: tab1.txn_ref_dt
739- driver: regulatory # Describes the importance of the SLA: [regulatory|analytics|operational|...]
740- - property: timeOfAvailability
741- value: 08:00-08:00
742- column: tab1.txn_ref_dt
743- driver: analytics
744-
745- # Tags
746- tags:
747- - transactions
748-
749- # Custom properties
750- customProperties:
751- - property: refRulesetName
752- value: gcsc.ruleset.name
753- - property: somePropertyName
754- value: property.value
755- - property: dataprocClusterName # Used for specific applications like Elevate
756- value: [cluster name]
757-
758- systemInstance: instance.ClimateQuantum.org
759- contractCreatedTs: 2022-11-15 02:59:43
760- ` ` `
516+ [Check full example here.](../examples/all/full-example.yaml)
0 commit comments