Skip to content

Commit 776c38b

Browse files
committed
Update row-size-sampler.sh
update the script based on user feedback
1 parent ab7105f commit 776c38b

File tree

1 file changed

+24
-4
lines changed

1 file changed

+24
-4
lines changed

bin/row-size-sampler.sh

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,31 @@ SYSTEMKEYSPACEFILTER='system\|system_schema\|system_traces\|system_auth\|dse_aut
3535

3636
TABLEFILTER='^-\|^table_name\|(\|)'
3737

38-
keyspaces=$(echo desc keyspaces | kqlsh $@ | xargs -n1 echo | grep -v $SYSTEMKEYSPACEFILTER)
38+
#look at all keyspaces
39+
keyspaces=$(echo desc keyspaces | kqlsh "$@" | xargs -n1 echo | grep -v $SYSTEMKEYSPACEFILTER)
3940
for ks in $keyspaces; do
40-
tables=$(echo "SELECT table_name FROM system_schema.tables WHERE keyspace_name='$ks';" | kqlsh $@ | xargs -n1 echo | grep -v $TABLEFILTER)
41+
42+
#look at all tables in keyspace
43+
tables=$(echo "SELECT table_name FROM system_schema.tables WHERE keyspace_name='$ks';" | kqlsh "$@" | xargs -n1 echo | grep -v $TABLEFILTER)
4144
for tb in $tables; do
42-
kqlsh $@ -e "CONSISTENCY LOCAL_ONE; PAGING 100; SELECT * FROM \"$ks\".\"$tb\" LIMIT 30000;" | grep -v '\[json\]\|rows)\|-----\|^$' | tr -d ' ' | awk -v keyspace=$ks -v table=$tb -F'|' 'BEGIN {columns=0; numSamples=30000; kilobyte=1024; min = "NaN"; max = -1; lines = 1; } { if(NR==2){columns=NF;} if(NR>2){thislen=length($0)+107; total+=thislen; squares+=thislen^2; lines+=1; avg=total/lines; min = (thislen<min ? thislen : min); max = (thislen>max ? thislen : max) }} NR==numSamples {exit} END { printf("%s.%s = { lines: %d, columns: %d, average: %d bytes, stdev: %d bytes, min: %d bytes, max: %d bytes}\n", keyspace, table, lines, columns, avg, sqrt(squares/lines - (avg^2)), min, max); }'
43-
kqlsh $@ -e "DESCRIBE \"$ks\".\"$tb\";" | grep -i blob | while read line; do printf "\t...\"$ks\".\"$tb\" contains a BLOB type, if the majority of row size is from the BLOB, then divide the estimate in half" ; done
45+
#if a table has a blob, its assumed that size of blob is large factor in row size.
46+
#if blob is detected the output totals should be divided by two for that table.
47+
#Divided by two since output is printed in Hex(2 bytes) for each byte.
48+
blob_factor="n"
49+
ttl_factor="y"
50+
static_factor="n"
51+
52+
describe=$(echo desc table $ks.$tb | kqlsh "$@")
53+
54+
while read line; do ttl_factor="n" ; done < <(echo desc table $ks.$tb | kqlsh "$@" | xargs echo | grep -i "default_time_to_live = 0")
55+
56+
while read line; do blob_factor="y" ; done < <(echo desc table $ks.$tb | kqlsh "$@" | xargs -n1 echo | grep -i blob)
57+
58+
while read line; do static_factor="y" ; done < <(echo desc table $ks.$tb | kqlsh "$@" | xargs -n1 echo | grep -i static)
59+
60+
#Calculate averages using awk
61+
kqlsh "$@" -e "CONSISTENCY LOCAL_ONE; PAGING 100; SELECT * FROM \"$ks\".\"$tb\" LIMIT 30000;" | grep -v '\[json\]\|rows)\|-----\|^$' | tr -d ' ' | awk -v keyspace=$ks -v table=$tb -v blob_factor=$blob_factor -v ttl_factor=$ttl_factor -v frozen_factor=$frozen_factor -v static_factor=$static_factor -F'|' 'BEGIN {columns=0; numSamples=30000; kilobyte=1024; min = "NaN"; max = -1; lines = 1; } { if(NR==3){columns=NF;} if(NR>2){thislen=(length($0))+100+6+(columns*2); total+=thislen; squares+=thislen^2; lines+=1; avg=total/lines; min = (thislen<min ? thislen : min); max = (thislen>max ? thislen : max) }} NR==numSamples {exit} END { printf("%s.%s = { lines: %d, columns: %d, average: %d bytes, stdev: %d bytes, min: %d bytes, max: %d bytes, blob: %s, default-ttl: %s, static: %s}\n", keyspace, table, lines, columns, avg, sqrt(squares/lines - (avg^2)), min, max, blob_factor, ttl_factor, static_factor); }'
4462
done
4563
done
64+
65+
echo 'fin!'

0 commit comments

Comments
 (0)