|
| 1 | +'''The following script will export schema from existing cassandra cluster and checks if the current cassandra cluster is using any api's that Amazon Keyspaces doesn't support yet |
| 2 | +
|
| 3 | +## Prerequisites: download required libraries like python,boto3 and cassandra-driver |
| 4 | +
|
| 5 | +usage instructions: |
| 6 | +1. Download this python script from github and move it to machine/vm/container that can access your cassandra cluster |
| 7 | +2. If you want to connect to cassandra cluster and perform a compatibility assesment, then run below command |
| 8 | +python toolkit-compat-tool.py --host "hostname or IP" -u "username" -p "password" --port "native transport port" |
| 9 | +3. If you want to simply perform compatibility assesment against schema file, you can skip step 2 and run these commands |
| 10 | +cqlsh '<hostname>' <port> -u '<username>' -p '<password>' -e "describe schema" > schema.cql |
| 11 | +python toolkit-compat-tool.py --file 'schema.cql' |
| 12 | +4. There are several workarounds available for unsupported features, you can reachout to AWS support for further assistance |
| 13 | +
|
| 14 | +## |
| 15 | + Execution Sample commands: python toolkit-compat-tool --host "hostname" -u "username" -p "pwd" --ssl |
| 16 | + python toolkit-compat-tool --host "hostname" -u "username" -p "pwd" --port 9042 |
| 17 | + python toolkit-compat-tool --file 'schema.cql' |
| 18 | +
|
| 19 | +''' |
| 20 | +import boto3, time, sys |
| 21 | +import getpass |
| 22 | +import optparse |
| 23 | +import os, re |
| 24 | +import sys |
| 25 | +from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT |
| 26 | +from cassandra.query import dict_factory |
| 27 | +from cassandra.metadata import KeyspaceMetadata, TableMetadata, Metadata |
| 28 | +from ssl import SSLContext, PROTOCOL_TLSv1_2, CERT_REQUIRED |
| 29 | +#from cassandra_sigv4.auth import * |
| 30 | +from cassandra.auth import PlainTextAuthProvider |
| 31 | +import logging |
| 32 | + |
| 33 | +DEFAULT_HOST = '127.0.0.1' |
| 34 | +DEFAULT_PORT = 9042 |
| 35 | +DEFAULT_SSL = False |
| 36 | +DEFAULT_SIGV4 = False |
| 37 | +DEFAULT_CONNECT_TIMEOUT_SECONDS = 5 |
| 38 | +DEFAULT_REQUEST_TIMEOUT_SECONDS = 10 |
| 39 | +DEFAULT_AUTH_PROVIDER = 'PlainTextAuthProvider' |
| 40 | +auth = False |
| 41 | + |
| 42 | +parser = optparse.OptionParser(description='Compatibility tool') |
| 43 | +parser.add_option('--host', help='Amazon Keyspaces endpoint') |
| 44 | +parser.add_option("--sigv4", action='store_true', help='Use SigV4AuthProvider plugin for autentication and authorization', default=DEFAULT_SIGV4) |
| 45 | +parser.add_option("--auth-provider", help="The AuthProvider to use when connecting. Default is PlainTextAuthProvider", dest='auth_provider_name', default=DEFAULT_AUTH_PROVIDER) |
| 46 | +parser.add_option("-u", "--username", help="Authenticate as user.") |
| 47 | +parser.add_option("-p", "--password", help="Authenticate using password.") |
| 48 | +parser.add_option('--ssl', action='store_true', help='Use SSL', default=False) |
| 49 | +parser.add_option("--port", help='Port to connect to cassandra', type=int) |
| 50 | +parser.add_option("--file", help='Schema file to run compatability script against') |
| 51 | + |
| 52 | + |
| 53 | +# The function checks if the system_schema is part of the schema exported and tracks the frozen type in system_schema |
| 54 | + |
| 55 | +def frozen_counter(cluster_schema): |
| 56 | + frozen_in_system_schema = 0 |
| 57 | + system_schema_keyspace_tables = ['system_schema.tables', 'system_schema.functions', 'system_schema.triggers', 'system_schema.aggregates', 'system_schema.views', 'system_schema.indexes', 'system_schema.keyspaces', 'system_schema.types','system.paxos_repair_history','system.top_partitions','system_auth.network_permissions','system_views.clients','system_views.repair_jobs','system_views.repair_participates','system_views.repair_sessions','system_views.repair_validations','system_views.repairs','system_views.streaming'] |
| 58 | + for item in system_schema_keyspace_tables: |
| 59 | + occurances = re.findall( item, cluster_schema, re.IGNORECASE) |
| 60 | + if len(occurances) > 0: |
| 61 | + if item == 'system_schema.tables': |
| 62 | + frozen_in_system_schema = frozen_in_system_schema + 5 |
| 63 | + elif item == 'system_schema.functions': |
| 64 | + frozen_in_system_schema = frozen_in_system_schema + 2 |
| 65 | + elif item == 'system_schema.triggers': |
| 66 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 67 | + elif item == 'system_schema.aggregates': |
| 68 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 69 | + elif item == 'system_schema.views': |
| 70 | + frozen_in_system_schema = frozen_in_system_schema + 4 |
| 71 | + elif item == 'system_schema.indexes': |
| 72 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 73 | + elif item == 'system_schema.keyspaces': |
| 74 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 75 | + elif item == 'system_schema.types': |
| 76 | + frozen_in_system_schema = frozen_in_system_schema + 2 |
| 77 | + elif item == 'system.paxos_repair_history': |
| 78 | + frozen_in_system_schema = frozen_in_system_schema + 2 |
| 79 | + elif item == 'system.top_partitions': |
| 80 | + frozen_in_system_schema = frozen_in_system_schema + 2 |
| 81 | + elif item == 'system_auth.network_permissions': |
| 82 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 83 | + elif item == 'system_views.clients': |
| 84 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 85 | + elif item == 'system_views.repair_jobs': |
| 86 | + frozen_in_system_schema = frozen_in_system_schema + 2 |
| 87 | + elif item == 'system_views.repair_participates': |
| 88 | + frozen_in_system_schema = frozen_in_system_schema + 3 |
| 89 | + elif item == 'system_views.repair_sessions': |
| 90 | + frozen_in_system_schema = frozen_in_system_schema + 4 |
| 91 | + elif item == 'system_views.repair_validations': |
| 92 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 93 | + elif item == 'system_views.repairs': |
| 94 | + frozen_in_system_schema = frozen_in_system_schema + 11 |
| 95 | + elif item == 'system_views.streaming': |
| 96 | + frozen_in_system_schema = frozen_in_system_schema + 1 |
| 97 | + return frozen_in_system_schema |
| 98 | + |
| 99 | +# The function checks the schema and provides the compatability |
| 100 | +def schema_check(cluster_schema): |
| 101 | + |
| 102 | + api_features = ['CREATE INDEX', 'CREATE TYPE', 'CREATE TRIGGER', 'CREATE FUNCTION', 'CREATE AGGREGATE', 'CREATE MATERIALIZED VIEW', 'frozen<', 'cdc = true'] |
| 103 | + unsupported_features_List = {} |
| 104 | + unsupported_features_message = {} |
| 105 | + |
| 106 | + unsupported_features_message['CREATE INDEX'] = 'Secondary Index' |
| 107 | + unsupported_features_message['CREATE TYPE'] = 'User Defined Type' |
| 108 | + unsupported_features_message['CREATE TRIGGER'] = 'Trigger' |
| 109 | + unsupported_features_message['CREATE FUNCTION'] = 'User Defined function' |
| 110 | + unsupported_features_message['CREATE AGGREGATE'] = 'Aggregators' |
| 111 | + unsupported_features_message['CREATE MATERIALIZED VIEW'] = 'Materialized View' |
| 112 | + unsupported_features_message['frozen<'] = 'Frozen Data type' |
| 113 | + unsupported_features_message['cdc = true'] = 'Change Data Capture' |
| 114 | + |
| 115 | + for item in api_features: |
| 116 | + occurances = re.findall( item, cluster_schema, re.IGNORECASE) |
| 117 | + if len(occurances) != 0: |
| 118 | + unsupported_features_List[item] = len(occurances) |
| 119 | + |
| 120 | + if len(unsupported_features_List) == 0: |
| 121 | + print("No unsupported Cassandra operators found") |
| 122 | + |
| 123 | + elif len(unsupported_features_List) > 0: |
| 124 | + print("") |
| 125 | + print('The following {} unsupported features were found. Some of these features have workarounds. Please contact Amazon Keyspaces team - mbh@amazon.com, kanvesky@amazon.com and nkantam@amazon.com to know about Workaround information'.format(len(unsupported_features_List))) |
| 126 | + for item in unsupported_features_List.keys(): |
| 127 | + print("") |
| 128 | + if item == 'frozen<': |
| 129 | + frozen_system = frozen_counter(cluster_schema) |
| 130 | + frozen_non_system_keyspaces = unsupported_features_List[item] - frozen_system |
| 131 | + if frozen_non_system_keyspaces > 0: |
| 132 | + print(" {} | found {} time(s)".format(unsupported_features_message[item], frozen_non_system_keyspaces)) |
| 133 | + else: |
| 134 | + print(" {} | found {} time(s)".format(unsupported_features_message[item], unsupported_features_List[item])) |
| 135 | + |
| 136 | +optvalues = optparse.Values() |
| 137 | +(options, arguments) = parser.parse_args(sys.argv[1:], values=optvalues) |
| 138 | + |
| 139 | +if hasattr(options, 'file'): |
| 140 | + schema_path = str(options.file) |
| 141 | + file_check = os.path.isfile(schema_path) |
| 142 | + if file_check: |
| 143 | + # Read the schema if file exists |
| 144 | + with open(schema_path, 'r') as file: |
| 145 | + schema = file.read() |
| 146 | + # calls the check schema function for compatability |
| 147 | + schema_check(schema) |
| 148 | + print("") |
| 149 | + sys.exit("Script Execution Completed") |
| 150 | + # Exists the script if the file path provided is invalid |
| 151 | + else: |
| 152 | + print("") |
| 153 | + sys.exit("NO FILE EXISTS,re-validate the path and re-run the script again") |
| 154 | + |
| 155 | + |
| 156 | +if hasattr(options, 'host'): |
| 157 | + endpoint = str(options.host).replace("'", '') |
| 158 | +else: |
| 159 | + endpoint = DEFAULT_HOST |
| 160 | + |
| 161 | +if hasattr(options, 'username'): |
| 162 | + username = str(options.username) |
| 163 | +else: |
| 164 | + username = None |
| 165 | + |
| 166 | +if hasattr(options, 'password'): |
| 167 | + password = str(options.password) |
| 168 | +else: |
| 169 | + password = None |
| 170 | + |
| 171 | +if hasattr(options, 'port'): |
| 172 | + cql_port = options.port |
| 173 | +else: |
| 174 | + cql_port = DEFAULT_PORT |
| 175 | + |
| 176 | +if hasattr(options, 'ssl'): |
| 177 | + ssl = True |
| 178 | +else: |
| 179 | + ssl = DEFAULT_SSL |
| 180 | + |
| 181 | +if ( hasattr(options, 'sigv4') | hasattr(options, 'auth_provider_name') ): |
| 182 | + auth = True |
| 183 | + |
| 184 | +user = getpass.getuser() |
| 185 | +user_dir = os.path.expanduser('~'+user) |
| 186 | +config_dir = os.path.join( user_dir, '.cassandra') |
| 187 | +cert_dir = os.path.join(config_dir, 'sf-class2-root.crt') |
| 188 | +compat_log = os.path.join(config_dir, 'compat') |
| 189 | + |
| 190 | +#now we will Create and configure logger |
| 191 | +logging.basicConfig(filename=compat_log, |
| 192 | + format='%(asctime)s %(message)s', |
| 193 | + filemode='a') |
| 194 | +#Let us Create an object |
| 195 | +logger=logging.getLogger() |
| 196 | + |
| 197 | +#Now we are going to Set the threshold of logger to INFO |
| 198 | +logger.setLevel(logging.INFO) |
| 199 | + |
| 200 | +ssl_context = SSLContext(PROTOCOL_TLSv1_2) |
| 201 | +ssl_context.load_verify_locations(cert_dir) # change the location to access the cert file |
| 202 | +ssl_context.verify_mode = CERT_REQUIRED |
| 203 | + |
| 204 | +''' |
| 205 | +The following connection example uses the SigV4Athentication plugin for the datastax python driver. |
| 206 | +This allows you to use user roles and service roles to delegate authentication and authorization to |
| 207 | +Documentation for the sigv4connection can be found here: |
| 208 | + https://docs.aws.amazon.com/keyspaces/latest/devguide/using_python_driver.html |
| 209 | + Github example can be found here: https://github.com/aws/aws-sigv4-auth-cassandra-python-driver-plugin |
| 210 | +''' |
| 211 | +## Credntial Configuration https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html |
| 212 | +## https://docs.aws.amazon.com/keyspaces/latest/devguide/programmatic.endpoints.html |
| 213 | + |
| 214 | +if hasattr(options, 'sigv4'): |
| 215 | + my_session = boto3.session.Session() |
| 216 | + auth_provider = SigV4AuthProvider(my_session) |
| 217 | +elif hasattr(options, 'auth_provider_name'): |
| 218 | + if options.auth_provider_name == 'SigV4AuthProvider': |
| 219 | + my_session = boto3.session.Session() |
| 220 | + auth_provider = SigV4AuthProvider(my_session) |
| 221 | + elif options.auth_provider_name == DEFAULT_AUTH_PROVIDER: |
| 222 | + if username: |
| 223 | + if not hasattr(options, 'password'): |
| 224 | + password = getpass.getpass() |
| 225 | + auth_provider = PlainTextAuthProvider(username=username, password=password) |
| 226 | + else: |
| 227 | + auth_provider = PlainTextAuthProvider(username=username, password=password) |
| 228 | + else: |
| 229 | + raise SyntaxError('Invalid parameter for auth-provider. "%s" is not a valid AuthProvider' % (auth_provider,)) |
| 230 | +else: |
| 231 | + if username: |
| 232 | + auth = True |
| 233 | + if not hasattr(options, 'password'): |
| 234 | + password = getpass.getpass() |
| 235 | + auth_provider = PlainTextAuthProvider(username=username, password=password) |
| 236 | + else: |
| 237 | + auth_provider = PlainTextAuthProvider(username=username, password=password) |
| 238 | + |
| 239 | +profile = ExecutionProfile(request_timeout= None, row_factory=dict_factory) |
| 240 | + |
| 241 | +cluster = Cluster( [endpoint], ssl_context=ssl_context if ssl else None, auth_provider=auth_provider if auth else None, port=cql_port, |
| 242 | + execution_profiles={EXEC_PROFILE_DEFAULT: profile}) |
| 243 | +session = cluster.connect() |
| 244 | +cluster_schema = cluster.metadata.export_schema_as_string() |
| 245 | +schema_check(cluster_schema) |
| 246 | +print("") |
| 247 | +sys.exit("Script Execution Completed") |
| 248 | + |
0 commit comments