new: [db] kvrocks support added instead of redis back-end

master
Alexandre Dulaunoy 2022-07-15 09:49:41 +02:00
parent 18e42e827e
commit ecd056272c
No known key found for this signature in database
GPG Key ID: 09E2CD4944E6CBCD
3 changed files with 632 additions and 1 deletions

602
etc/kvrocks.conf Normal file
View File

@ -0,0 +1,602 @@
################################ GENERAL #####################################
# By default kvrocks listens for connections from all the network interfaces
# available on the server. It is possible to listen to just one or multiple
# interfaces using the "bind" configuration directive, followed by one or
# more IP addresses.
#
# Examples:
#
# bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1
bind 127.0.0.1
# Accept connections on the specified port, default is 6666.
port 6400
# Close the connection after a client is idle for N seconds (0 to disable)
timeout 0
# The number of worker's threads, increase or decrease it would effect the performance.
workers 8
# By default kvrocks does not run as a daemon. Use 'yes' if you need it.
# Note that kvrocks will write a pid file in /var/run/kvrocks.pid when daemonized.
daemonize no
# Kvrocks implements cluster solution that is similar with redis cluster solution.
# You can get cluster information by CLUSTER NODES|SLOTS|INFO command, it also is
# adapted to redis-cli, redis-benchmark, redis cluster SDK and redis cluster proxy.
# But kvrocks doesn't support to communicate with each others, so you must set
# cluster topology by CLUSTER SETNODES|SETNODEID commands, more details: #219.
#
# PLEASE NOTE:
# If you enable cluster, kvrocks will encode key with its slot id calculated by
# CRC16 and modulo 16384, endoding key with its slot id makes it efficient to
# migrate keys based on slot. So if you enabled at first time, cluster mode must
# not be disabled after restarting, and vice versa. That is to say, data is not
# compatible between standalone mode with cluster mode, you must migrate data
# if you want to change mode, otherwise, kvrocks will make data corrupt.
#
# Default: no
cluster-enabled no
# Set the max number of connected clients at the same time. By default
# this limit is set to 10000 clients, however if the server is not
# able to configure the process file limit to allow for the specified limit
# the max number of allowed clients is set to the current file limit
#
# Once the limit is reached the server will close all the new connections sending
# an error 'max number of clients reached'.
#
maxclients 10000
# Require clients to issue AUTH <PASSWORD> before processing any other
# commands. This might be useful in environments in which you do not trust
# others with access to the host running kvrocks.
#
# This should stay commented out for backward compatibility and because most
# people do not need auth (e.g. they run their own servers).
#
# Warning: since kvrocks is pretty fast an outside user can try up to
# 150k passwords per second against a good box. This means that you should
# use a very strong password otherwise it will be very easy to break.
#
# requirepass foobared
# If the master is password protected (using the "masterauth" configuration
# directive below) it is possible to tell the slave to authenticate before
# starting the replication synchronization process, otherwise the master will
# refuse the slave request.
#
# masterauth foobared
# Master-Salve replication would check db name is matched. if not, the slave should
# refuse to sync the db from master. Don't use default value, set the db-name to identify
# the cluster.
db-name d4-pdns.db
# The working directory
#
# The DB will be written inside this directory
# Note that you must specify a directory here, not a file name.
#dir /tmp/kvrocks
# The logs of server will be stored in this directory. If you don't specify
# one directory, by default, we store logs in the working directory that set
# by 'dir' above.
# log-dir /tmp/kvrocks
# When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by
# default. You can specify a custom pid file location here.
# pidfile /var/run/kvrocks.pid
pidfile ""
# You can configure a slave instance to accept writes or not. Writing against
# a slave instance may be useful to store some ephemeral data (because data
# written on a slave will be easily deleted after resync with the master) but
# may also cause problems if clients are writing to it because of a
# misconfiguration.
slave-read-only yes
# The slave priority is an integer number published by Kvrocks in the INFO output.
# It is used by Redis Sentinel in order to select a slave to promote into a
# master if the master is no longer working correctly.
#
# A slave with a low priority number is considered better for promotion, so
# for instance if there are three slave with priority 10, 100, 25 Sentinel will
# pick the one with priority 10, that is the lowest.
#
# However a special priority of 0 marks the replica as not able to perform the
# role of master, so a slave with priority of 0 will never be selected by
# Redis Sentinel for promotion.
#
# By default the priority is 100.
slave-priority 100
# TCP listen() backlog.
#
# In high requests-per-second environments you need an high backlog in order
# to avoid slow clients connections issues. Note that the Linux kernel
# will silently truncate it to the value of /proc/sys/net/core/somaxconn so
# make sure to raise both the value of somaxconn and tcp_max_syn_backlog
# in order to Get the desired effect.
tcp-backlog 511
# If the master is an old version, it may have specified replication threads
# that use 'port + 1' as listening port, but in new versions, we don't use
# extra port to implement replication. In order to allow the new replicas to
# copy old masters, you should indicate that the master uses replication port
# or not.
# If yes, that indicates master uses replication port and replicas will connect
# to 'master's listening port + 1' when synchronization.
# If no, that indicates master doesn't use replication port and replicas will
# connect 'master's listening port' when synchronization.
master-use-repl-port no
# Master-Slave replication. Use slaveof to make a kvrocks instance a copy of
# another kvrocks server. A few things to understand ASAP about kvrocks replication.
#
# 1) Kvrocks replication is asynchronous, but you can configure a master to
# stop accepting writes if it appears to be not connected with at least
# a given number of slaves.
# 2) Kvrocks slaves are able to perform a partial resynchronization with the
# master if the replication link is lost for a relatively small amount of
# time. You may want to configure the replication backlog size (see the next
# sections of this file) with a sensible value depending on your needs.
# 3) Replication is automatic and does not need user intervention. After a
# network partition slaves automatically try to reconnect to masters
# and resynchronize with them.
#
# slaveof <masterip> <masterport>
# slaveof 127.0.0.1 6379
# When a slave loses its connection with the master, or when the replication
# is still in progress, the slave can act in two different ways:
#
# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will
# still reply to client requests, possibly with out of date data, or the
# data set may just be empty if this is the first synchronization.
#
# 2) if slave-serve-stale-data is set to 'no' the slave will reply with
# an error "SYNC with master in progress" to all the kind of commands
# but to INFO and SLAVEOF.
#
slave-serve-stale-data yes
# To guarantee slave's data safe and serve when it is in full synchronization
# state, slave still keep itself data. But this way needs to occupy much disk
# space, so we provide a way to reduce disk occupation, slave will delete itself
# entire database before fetching files from master during full synchronization.
# If you want to enable this way, you can set 'slave-delete-db-before-fullsync'
# to yes, but you must know that database will be lost if master is down during
# full synchronization, unless you have a backup of database.
#
# This option is similar redis replicas RDB diskless load option:
# repl-diskless-load on-empty-db
#
# Default: no
slave-empty-db-before-fullsync no
# If replicas need full synchronization with master, master need to create
# checkpoint for feeding replicas, and replicas also stage a checkpoint of
# the master. If we also keep the backup, it maybe occupy extra disk space.
# You can enable 'purge-backup-on-fullsync' if disk is not sufficient, but
# that may cause remote backup copy failing.
#
# Default: no
purge-backup-on-fullsync no
# The maximum allowed rate (in MB/s) that should be used by Replication.
# If the rate exceeds max-replication-mb, replication will slow down.
# Default: 0 (i.e. no limit)
max-replication-mb 0
# The maximum allowed aggregated write rate of flush and compaction (in MB/s).
# If the rate exceeds max-io-mb, io will slow down.
# 0 is no limit
# Default: 500
max-io-mb 500
# The maximum allowed space (in GB) that should be used by RocksDB.
# If the total size of the SST files exceeds max_allowed_space, writes to RocksDB will fail.
# Please see: https://github.com/facebook/rocksdb/wiki/Managing-Disk-Space-Utilization
# Default: 0 (i.e. no limit)
max-db-size 0
# The maximum backup to keep, server cron would run every minutes to check the num of current
# backup, and purge the old backup if exceed the max backup num to keep. If max-backup-to-keep
# is 0, no backup would be keep. But now, we only support 0 or 1.
max-backup-to-keep 1
# The maximum hours to keep the backup. If max-backup-keep-hours is 0, wouldn't purge any backup.
# default: 1 day
max-backup-keep-hours 24
# max-bitmap-to-string-mb use to limit the max size of bitmap to string transformation(MB).
#
# Default: 16
max-bitmap-to-string-mb 16
################################## SLOW LOG ###################################
# The Kvrocks Slow Log is a mechanism to log queries that exceeded a specified
# execution time. The execution time does not include the I/O operations
# like talking with the client, sending the reply and so forth,
# but just the time needed to actually execute the command (this is the only
# stage of command execution where the thread is blocked and can not serve
# other requests in the meantime).
#
# You can configure the slow log with two parameters: one tells Kvrocks
# what is the execution time, in microseconds, to exceed in order for the
# command to get logged, and the other parameter is the length of the
# slow log. When a new command is logged the oldest one is removed from the
# queue of logged commands.
# The following time is expressed in microseconds, so 1000000 is equivalent
# to one second. Note that -1 value disables the slow log, while
# a value of zero forces the logging of every command.
slowlog-log-slower-than 100000
# There is no limit to this length. Just be aware that it will consume memory.
# You can reclaim memory used by the slow log with SLOWLOG RESET.
slowlog-max-len 128
# If you run kvrocks from upstart or systemd, kvrocks can interact with your
# supervision tree. Options:
# supervised no - no supervision interaction
# supervised upstart - signal upstart by putting kvrocks into SIGSTOP mode
# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
# supervised auto - detect upstart or systemd method based on
# UPSTART_JOB or NOTIFY_SOCKET environment variables
# Note: these supervision methods only signal "process is ready."
# They do not enable continuous liveness pings back to your supervisor.
supervised no
################################## PERF LOG ###################################
# The Kvrocks Perf Log is a mechanism to log queries' performance context that
# exceeded a specified execution time. This mechanism uses rocksdb's
# Perf Context and IO Stats Context, Please see:
# https://github.com/facebook/rocksdb/wiki/Perf-Context-and-IO-Stats-Context
#
# This mechanism is enabled when profiling-sample-commands is not empty and
# profiling-sample-ratio greater than 0.
# It is important to note that this mechanism affects performance, but it is
# useful for troubleshooting performance bottlenecks, so it should only be
# enabled when performance problems occur.
# The name of the commands you want to record. Must be original name of
# commands supported by Kvrocks. Use ',' to separate multiple commands and
# use '*' to record all commands supported by Kvrocks.
# Example:
# - Single command: profiling-sample-commands get
# - Multiple commands: profiling-sample-commands get,mget,hget
#
# Default: empty
# profiling-sample-commands ""
# Ratio of the samples would be recorded. We simply use the rand to determine
# whether to record the sample or not.
#
# Default: 0
profiling-sample-ratio 0
# There is no limit to this length. Just be aware that it will consume memory.
# You can reclaim memory used by the perf log with PERFLOG RESET.
#
# Default: 256
profiling-sample-record-max-len 256
# profiling-sample-record-threshold-ms use to tell the kvrocks when to record.
#
# Default: 100 millisecond
profiling-sample-record-threshold-ms 100
################################## CRON ###################################
# Compact Scheduler, auto compact at schedule time
# time expression format is the same as crontab(currently only support * and int)
# e.g. compact-cron 0 3 * * * 0 4 * * *
# would compact the db at 3am and 4am everyday
# compact-cron 0 3 * * *
# The hour range that compaction checker would be active
# e.g. compaction-checker-range 0-7 means compaction checker would be worker between
# 0-7am every day.
compaction-checker-range 0-7
# Bgsave scheduler, auto bgsave at schedule time
# time expression format is the same as crontab(currently only support * and int)
# e.g. bgsave-cron 0 3 * * * 0 4 * * *
# would bgsave the db at 3am and 4am everyday
# Command renaming.
#
# It is possible to change the name of dangerous commands in a shared
# environment. For instance the KEYS command may be renamed into something
# hard to guess so that it will still be available for internal-use tools
# but not available for general clients.
#
# Example:
#
# rename-command KEYS b840fc02d524045429941cc15f59e41cb7be6c52
#
# It is also possible to completely kill a command by renaming it into
# an empty string:
#
# rename-command KEYS ""
# The key-value size may so be quite different in many scenes, and use 256MiB as SST file size
# may cause data loading(large index/filter block) ineffective when the key-value was too small.
# kvrocks supports user-defined SST file in config(rocksdb.target_file_size_base),
# but it still too trivial and inconvenient to adjust the different sizes for different instances.
# so we want to periodic auto-adjust the SST size in-flight with user avg key-value size.
#
# If enabled, kvrocks will auto resize rocksdb.target_file_size_base
# and rocksdb.write_buffer_size in-flight with user avg key-value size.
# Please see #118.
#
# Default: yes
auto-resize-block-and-sst yes
################################ MIGRATE #####################################
# If the network bandwidth is completely consumed by the migration task,
# it will affect the availability of kvrocks. To avoid this situation,
# migrate-speed is adpoted to limit the migrating speed.
# Migrating speed is limited by controling the duraiton between sending data,
# the duation is calculated by: 1000000 * migrate-pipeline-size / migrate-speed (us).
# Value: [0,INT_MAX], 0 means no limit
#
# Default: 4096
migrate-speed 4096
# In order to reduce data transimission times and improve the efficiency of data migration,
# pipeline is adopted to send multiple data at once. Pipeline size can be set by this option.
# Value: [1, INT_MAX], it can't be 0
#
# Default: 16
migrate-pipeline-size 16
# In order to reduce the write forbidden time during migrating slot, we will migrate the incremetal
# data sevral times to reduce the amount of incremetal data. Until the quantity of incremetal
# data is reduced to a certain threshold, slot will be forbidden write. The threshold is set by
# this option.
# Value: [1, INT_MAX], it can't be 0
#
# Default: 10000
migrate-sequence-gap 10000
################################ ROCKSDB #####################################
# Specify the capacity of metadata column family block cache. Larger block cache
# may make request faster while more keys would be cached. Max Size is 200*1024.
# Default: 2048MB
rocksdb.metadata_block_cache_size 2048
# Specify the capacity of subkey column family block cache. Larger block cache
# may make request faster while more keys would be cached. Max Size is 200*1024.
# Default: 2048MB
rocksdb.subkey_block_cache_size 2048
# Metadata column family and subkey column family will share a single block cache
# if set 'yes'. The capacity of shared block cache is
# metadata_block_cache_size + subkey_block_cache_size
#
# Default: yes
rocksdb.share_metadata_and_subkey_block_cache yes
# A global cache for table-level rows in RocksDB. If almost always point
# lookups, enlarging row cache may improve read performance. Otherwise,
# if we enlarge this value, we can lessen metadata/subkey block cache size.
#
# Default: 0 (disabled)
rocksdb.row_cache_size 0
# Number of open files that can be used by the DB. You may need to
# increase this if your database has a large working set. Value -1 means
# files opened are always kept open. You can estimate number of files based
# on target_file_size_base and target_file_size_multiplier for level-based
# compaction. For universal-style compaction, you can usually set it to -1.
# Default: 4096
rocksdb.max_open_files 8096
# Amount of data to build up in memory (backed by an unsorted log
# on disk) before converting to a sorted on-disk file.
#
# Larger values increase performance, especially during bulk loads.
# Up to max_write_buffer_number write buffers may be held in memory
# at the same time,
# so you may wish to adjust this parameter to control memory usage.
# Also, a larger write buffer will result in a longer recovery time
# the next time the database is opened.
#
# Note that write_buffer_size is enforced per column family.
# See db_write_buffer_size for sharing memory across column families.
# default is 64MB
rocksdb.write_buffer_size 64
# Target file size for compaction, target file size for Leve N can be caculated
# by target_file_size_base * (target_file_size_multiplier ^ (L-1))
#
# Default: 128MB
rocksdb.target_file_size_base 128
# The maximum number of write buffers that are built up in memory.
# The default and the minimum number is 2, so that when 1 write buffer
# is being flushed to storage, new writes can continue to the other
# write buffer.
# If max_write_buffer_number > 3, writing will be slowed down to
# options.delayed_write_rate if we are writing to the last write buffer
# allowed.
rocksdb.max_write_buffer_number 4
# Maximum number of concurrent background compaction jobs, submitted to
# the default LOW priority thread pool.
rocksdb.max_background_compactions 4
# Maximum number of concurrent background memtable flush jobs, submitted by
# default to the HIGH priority thread pool. If the HIGH priority thread pool
# is configured to have zero threads, flush jobs will share the LOW priority
# thread pool with compaction jobs.
rocksdb.max_background_flushes 4
# This value represents the maximum number of threads that will
# concurrently perform a compaction job by breaking it into multiple,
# smaller ones that are run simultaneously.
# Default: 2 (i.e. no subcompactions)
rocksdb.max_sub_compactions 2
# In order to limit the size of WALs, RocksDB uses DBOptions::max_total_wal_size
# as the trigger of column family flush. Once WALs exceed this size, RocksDB
# will start forcing the flush of column families to allow deletion of some
# oldest WALs. This config can be useful when column families are updated at
# non-uniform frequencies. If there's no size limit, users may need to keep
# really old WALs when the infrequently-updated column families hasn't flushed
# for a while.
#
# In kvrocks, we use multiple column families to store metadata, subkeys, etc.
# If users always use string type, but use list, hash and other complex data types
# infrequently, there will be a lot of old WALs if we don't set size limit
# (0 by default in rocksdb), because rocksdb will dynamically choose the WAL size
# limit to be [sum of all write_buffer_size * max_write_buffer_number] * 4 if set to 0.
#
# Moreover, you should increase this value if you already set rocksdb.write_buffer_size
# to a big value, to avoid influencing the effect of rocksdb.write_buffer_size and
# rocksdb.max_write_buffer_number.
#
# default is 512MB
rocksdb.max_total_wal_size 512
# We impl the repliction with rocksdb WAL, it would trigger full sync when the seq was out of range.
# wal_ttl_seconds and wal_size_limit_mb would affect how archived logswill be deleted.
# If WAL_ttl_seconds is not 0, then WAL files will be checked every WAL_ttl_seconds / 2 and those that
# are older than WAL_ttl_seconds will be deleted#
#
# Default: 3 Hours
rocksdb.wal_ttl_seconds 10800
# If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
# WAL files will be checked every 10 min and if total size is greater
# then WAL_size_limit_MB, they will be deleted starting with the
# earliest until size_limit is met. All empty files will be deleted
# Default: 16GB
rocksdb.wal_size_limit_mb 16384
# Approximate size of user data packed per block. Note that the
# block size specified here corresponds to uncompressed data. The
# actual size of the unit read from disk may be smaller if
# compression is enabled.
#
# Default: 4KB
rocksdb.block_size 16384
# Indicating if we'd put index/filter blocks to the block cache
#
# Default: no
rocksdb.cache_index_and_filter_blocks yes
# Specify the compression to use. Only compress level greater
# than 2 to improve performance.
# Accept value: "no", "snappy"
# default snappy
#rocksdb.compression snappy
# If non-zero, we perform bigger reads when doing compaction. If you're
# running RocksDB on spinning disks, you should set this to at least 2MB.
# That way RocksDB's compaction is doing sequential instead of random reads.
# When non-zero, we also force new_table_reader_for_compaction_inputs to
# true.
#
# Default: 2 MB
rocksdb.compaction_readahead_size 2097152
# he limited write rate to DB if soft_pending_compaction_bytes_limit or
# level0_slowdown_writes_trigger is triggered.
# If the value is 0, we will infer a value from `rater_limiter` value
# if it is not empty, or 16MB if `rater_limiter` is empty. Note that
# if users change the rate in `rate_limiter` after DB is opened,
# `delayed_write_rate` won't be adjusted.
#
rocksdb.delayed_write_rate 0
# If enable_pipelined_write is true, separate write thread queue is
# maintained for WAL write and memtable write.
#
# Default: no
rocksdb.enable_pipelined_write no
# Soft limit on number of level-0 files. We start slowing down writes at this
# point. A value <0 means that no writing slow down will be triggered by
# number of files in level-0.
#
# Default: 20
rocksdb.level0_slowdown_writes_trigger 20
# Maximum number of level-0 files. We stop writes at this point.
#
# Default: 40
rocksdb.level0_stop_writes_trigger 40
# Number of files to trigger level-0 compaction.
#
# Default: 4
rocksdb.level0_file_num_compaction_trigger 4
# if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
#
# Default: 0
rocksdb.stats_dump_period_sec 0
# if yes, the auto compaction would be disabled, but the manual compaction remain works
#
# Default: no
rocksdb.disable_auto_compactions no
# BlobDB(key-value separation) is essentially RocksDB for large-value use cases.
# Since 6.18.0, The new implementation is integrated into the RocksDB core.
# When set, large values (blobs) are written to separate blob files, and only
# pointers to them are stored in SST files. This can reduce write amplification
# for large-value use cases at the cost of introducing a level of indirection
# for reads. Please see: https://github.com/facebook/rocksdb/wiki/BlobDB.
#
# Note that when enable_blob_files is set to yes, BlobDB-related configuration
# items will take effect.
#
# Default: no
rocksdb.enable_blob_files no
# The size of the smallest value to be stored separately in a blob file. Values
# which have an uncompressed size smaller than this threshold are stored alongside
# the keys in SST files in the usual fashion.
#
# Default: 4096 byte, 0 means that all values are stored in blob files
rocksdb.min_blob_size 4096
# The size limit for blob files. When writing blob files, a new file is
# opened once this limit is reached.
#
# Default: 128 M
rocksdb.blob_file_size 128
# Enables garbage collection of blobs. Valid blobs residing in blob files
# older than a cutoff get relocated to new files as they are encountered
# during compaction, which makes it possible to clean up blob files once
# they contain nothing but obsolete/garbage blobs.
# See also rocksdb.blob_garbage_collection_age_cutoff below.
#
# Default: yes
rocksdb.enable_blob_garbage_collection yes
# The percentage cutoff in terms of blob file age for garbage collection.
# Blobs in the oldest N blob files will be relocated when encountered during
# compaction, where N = (garbage_collection_cutoff/100) * number_of_blob_files.
# Note that this value must belong to [0, 100].
#
# Default: 25
rocksdb.blob_garbage_collection_age_cutoff 25
################################ NAMESPACE #####################################
# namespace.test change.me

22
install_server_kvrocks.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash
set -e
set -x
sudo apt-get install python3-pip virtualenv screen -y
if [ -z "$VIRTUAL_ENV" ]; then
virtualenv -p python3 PDNSENV
echo export PDNS_HOME=$(pwd) >> ./PDNSENV/bin/activate
. ./PDNSENV/bin/activate
fi
python3 -m pip install -r requirements
# REDIS #
mkdir -p db
test ! -d kvrocks/ && git clone https://github.com/apache/incubator-kvrocks.git kvrocks
pushd kvrocks/
git checkout 2.0
make -j4
popd

View File

@ -12,7 +12,14 @@ fi
screen -dmS "pdns" screen -dmS "pdns"
sleep 0.1 sleep 0.1
screen -S "pdns" -X screen -t "pdns-lookup-redis" bash -c "(${DIR}/redis/src/redis-server ${DIR}/etc/redis.conf); read x;" if [ -e "${DIR}/redis" ]; then
screen -S "pdns" -X screen -t "pdns-lookup-redis" bash -c "(${DIR}/redis/src/redis-server ${DIR}/etc/redis.conf); read x;"
fi
if [ -e "${DIR}/kvrocks" ]; then
screen -S "pdns" -X screen -t "pdns-lookup-kvrocks" bash -c "(${DIR}/kvrocks/src/kvrocks -c ${DIR}/etc/kvrocks.conf); read x;"
fi
screen -S "pdns" -X screen -t "pdns-cof" bash -c "(cd bin; ${ENV_PY} ./pdns-cof-server.py; read x;)" screen -S "pdns" -X screen -t "pdns-cof" bash -c "(cd bin; ${ENV_PY} ./pdns-cof-server.py; read x;)"
screen -S "pdns" -X screen -t "pdns-ingester" bash -c "(cd bin; ${ENV_PY} ./pdns-ingestion.py; read x;)" screen -S "pdns" -X screen -t "pdns-ingester" bash -c "(cd bin; ${ENV_PY} ./pdns-ingestion.py; read x;)"