cassandra 筆記
阿新 • • 發佈:2019-01-02
資料遷移
第一次使用cassandra,筆記一下,業務遷移,有兩列資料沒有遷移到新平臺,特此實驗,研究一下檢視db
DESCRIBE KEYSPACE
查看錶
DESCRIBE TABLES
查看錶資訊
DESCRIBE tablename
建立測試表
CREATE TABLE test.cl_test (
id int PRIMARY KEY,
cnt blob,
cl1 blob,
cl2 blob
)
WITH bloom_filter_fp_chance = 0.01
AND caching = {'keys' : 'ALL', 'rows_per_partition': 'NONE'}
AND comment = ''
AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor' }
AND crc_check_chance = 1.0
AND dclocal_read_repair_chance = 0.1
AND default_time_to_live = 0
AND gc_grace_seconds = 86400
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair_chance = 0.0
AND speculative_retry = '99PERCENTILE' ;
插入資料
insert into cl_test (id,cnt,cl1,cl2) values (145,0x192c18096261736963696e666f192805617070696404646869641504392805493030303120396336366431616539636462343438346232346337633437613339633331326500180570686f6e6519280b70686f6e655f6272616e640b70686f6e655f6d6f64656c15043928056170706c65086970686f6e6520360015061696f194e1f15800,0x192c18096261736963696e666f192805617070696404646869641504392805493030303120396336366431616539636462343438346232346337633437613339633331326500180570686f6e6519280b70686f6e655f6272616e640b70686f6e655f6d6f64656c15043928056170706c65086970686f6e6520360015061696f194e1f15800,0x192c18096261736963696e666f192805617070696404646869641504392805493030303120396336366431616539636462343438346232346337633437613339633331326500180570686f6e6519280b70686f6e655f6272616e640b70686f6e655f6d6f64656c15043928056170706c65086970686f6e6520360015061696f194e1f15800);
刪除資料某行
DELETE from dlk_test where id=145;
刪除資料某行某列
DELETE cl1 from dlk_test where id=145;
資料匯出
COPY test.test_cl (id,cl1,cl2) TO '/data/dlk_export/test_cl.csv';
或者
bin/cqlsh hostname -e "COPY test.test_cl (id,cl1,cl2) TO '/data/export/test_cl.csv';"
資料匯入
COPY test.test_cl (id,cl1,cl2) FROM '/data/dlk_export/test_cl.csv';
匯入錯誤
Starting copy of test.test_cl with columns [id, ep_dlk, ip_dlk].
<stdin>:1:Failed to import 5000 rows: Error - field larger than field limit (131072), given up after 1 attempts
<stdin>:1:Exceeded maximum number of insert errors 1000
<stdin>:1:Failed to process 5000 rows; failed rows written to import_test_test_cl.err
<stdin>:1:Exceeded maximum number of insert errors 1000
Processed: 0 rows; Rate: 0 rows/s; Avg. rate: 0 rows/s
0 rows imported from 0 files in 5.355 seconds (0 skipped).
處理
在 cqsh中增加
import csv
import getpass
csv.field_size_limit(sys.maxsize)
繼續匯入錯誤
<stdin>:1:Failed to import 20 rows: InvalidRequest - Error from server: code=2200 [Invalid query] message="Batch too large", will retry later, attempt 3 of 5
<stdin>:1:Failed to import 20 rows: InvalidRequest - Error from server: code=2200 [Invalid query] message="Batch too large", given up after 5 attempts
<stdin>:1:Failed to import 20 rows: InvalidRequest - Error from server: code=2200 [Invalid query] message="Batch too large", will retry later, attempt 2 of 5
<stdin>:1:Failed to import 20 rows: InvalidRequest - Error from server: code=2200 [Invalid query] message="Batch too large", will retry later, attempt 4 of 5
<stdin>:1:Failed to import 20 rows: InvalidRequest - Error from server: code=2200 [Invalid query] message="Batch too large", will retry later, attempt 2 of 5```
處理:
etc/cassandra/cassandra.yaml檔案中,引數batch_size_fail_threshold_in_kb的預設值只有50,一條DML語句就超過了這個閾值.
修改
batch_size_fail_threshold_in_kb: 500
修改匯入語句
COPY test.test_cl (id,cl1,cl2) FROM '/data/dlk_export/test_cl.csv'; WITH MAXBATCHSIZE = 1 and MINBATCHSIZE = 1
ok