Purpose
This statement is used to create a new table in the database.
Syntax
CREATE [hint_options] [TEMPORARY] TABLE [IF NOT EXISTS] table_name
(table_definition_list) [table_option_list] [partition_option] [table_column_group_option] [IGNORE | REPLACE] [AS] select;
CREATE TABLE [TEMPORARY] [IF NOT EXISTS] table_name
LIKE table_name;
table_definition_list:
table_definition [, table_definition ...]
table_definition:
column_definition_list
| [CONSTRAINT [constraint_name]] PRIMARY KEY index_desc
| [CONSTRAINT [constraint_name]] UNIQUE {INDEX | KEY}
[index_name] index_desc
| [CONSTRAINT [constraint_name]] FOREIGN KEY
[index_name] index_desc
REFERENCES reference_definition
[match_action][opt_reference_option_list]
| [FULLTEXT] {INDEX | KEY} [index_name] [index_type] (key_part,...) [WITH PARSER tokenizer_option] [PARSER_PROPERTIES[=](parser_properties_list)] [FTS_INDEX_TYPE [=] {MATCH | PHRASE_MATCH}]
[index_option_list] [index_column_group_option]
| SEARCH INDEX [index_name] (search_index_column [, search_index_column ...])
| index_json_clause
| [CONSTRAINT [constraint_name]] CHECK(expression) constranit_state
column_definition_list:
column_definition [, column_definition ...]
column_definition:
column_name data_type
[DEFAULT const_value] [AUTO_INCREMENT]
[NULL | NOT NULL] [[PRIMARY] KEY] [UNIQUE [KEY]] [COMMENT string_value] [SKIP_INDEX(skip_index_option_list)]
| column_name data_type
[GENERATED ALWAYS] AS (expr) [VIRTUAL | STORED]
[opt_generated_column_attribute]
skip_index_option_list:
skip_index_option [,skip_index_option ...]
skip_index_option:
MIN_MAX
| SUM
index_desc:
(column_desc_list) [index_type] [index_option_list]
match_action:
MATCH {SIMPLE | FULL | PARTIAL}
opt_reference_option_list:
reference_option [,reference_option ...]
reference_option:
ON {DELETE | UPDATE} {RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT}
tokenizer_option:
SPACE
| NGRAM
| BENG
| IK
| NGRAM2
parser_properties_list:
parser_properties, [parser_properties]
parser_properties:
min_token_size = int_value
| max_token_size = int_value
| ngram_token_size = int_value
| ik_mode = 'char_value'
| min_ngram_size = int_value
| max_ngram_size = int_value
key_part:
{index_col_name [(length)] | (expr)} [ASC | DESC]
search_index_column:
column_name [WITH (search_col_option_list)]
search_col_option_list:
[INCLUDE_PATHS = ('path1'[, 'path2']...)]
[| EXCLUDE_PATHS = ('path1'[, 'path2']...)]
[| INCLUDE_TYPES = (type1[, type2]...)]
index_type:
USING BTREE
index_option_list:
index_option [ index_option ...]
index_option:
[GLOBAL | LOCAL]
| block_size
| compression
| STORING(column_name_list)
| COMMENT string_value
| STORAGE_CACHE_POLICY(storage_cache_policy_option)
table_option_list:
table_option [ table_option ...]
table_option:
[DEFAULT] {CHARSET | CHARACTER SET} [=] charset_name
| [DEFAULT] COLLATE [=] collation_name
| table_tablegroup
| block_size
| lob_inrow_threshold [=] num
| compression
| AUTO_INCREMENT [=] INT_VALUE
| COMMENT string_value
| ROW_FORMAT [=] REDUNDANT|COMPACT|DYNAMIC|COMPRESSED|DEFAULT
| PCTFREE [=] num
| parallel_clause
| DUPLICATE_SCOPE [=] 'none|cluster'
| TABLE_MODE [=] 'table_mode_value'
| auto_increment_cache_size [=] INT_VALUE
| READ {ONLY | WRITE}
| ORGANIZATION [=] {INDEX | HEAP}
| enable_macro_block_bloom_filter [=] {True | False}
| DYNAMIC_PARTITION_POLICY [=] (dynamic_partition_policy_list)
| SEMISTRUCT_ENCODING_TYPE [=] 'encoding' # Deprecated from V4.4.1. Use SEMISTRUCT_PROPERTIES instead.
| MICRO_BLOCK_FORMAT_VERSION [=] {1|2}
| STORAGE_CACHE_POLICY (storage_cache_policy_option)
| CLUSTER BY (column_name_list)
| HMS_CATALOG_NAME [=] string_value
| COLUMN_NAME_CASE_SENSITIVE [=] {True | False}
| MERGE_ENGINE = {delete_insert | partial_update | append_only}
| TTL [=]col_name + INTERVAL interval_num ttl_unit BY COMPACTION
| DELTA_FORMAT [=] 'flat | encoding'
| SKIP_INDEX_LEVEL [=] {1 | 0}
parallel_clause:
{NOPARALLEL | PARALLEL integer}
table_mode_value:
NORMAL
| QUEUING
| MODERATE
| SUPER
| EXTREME
dynamic_partition_policy_list:
dynamic_partition_policy_option [, dynamic_partition_policy_option ...]
dynamic_partition_policy_option:
ENABLE = {true | false}
| TIME_UNIT = {'hour' | 'day' | 'week' | 'month' | 'year'}
| PRECREATE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}
| EXPIRE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}
| TIME_ZONE = {'default' | 'time_zone'}
| BIGINT_PRECISION = {'none' | 'us' | 'ms' | 's'}
partition_option:
PARTITION BY HASH(expression)
[subpartition_option] PARTITIONS partition_count
| PARTITION BY KEY([column_name_list])
[subpartition_option] PARTITIONS partition_count
| PARTITION BY RANGE {(expression) | COLUMNS (column_name_list)}
[subpartition_option] (range_partition_list) [STORAGE_CACHE_POLICY = {"hot" | "auto" | "cold" | "none"}]
| PARTITION BY LIST {(expression) | COLUMNS (column_name_list)}
[subpartition_option] PARTITIONS partition_count
| PARTITION BY RANGE [COLUMNS]([column_name_list]) [SIZE('size_value')] (range_partition_list)
subpartition_option:
SUBPARTITION BY HASH(expression)
SUBPARTITIONS subpartition_count
| SUBPARTITION BY KEY(column_name_list)
SUBPARTITIONS subpartition_count
| SUBPARTITION BY RANGE {(expression) | COLUMNS (column_name_list)}
(range_subpartition_list) [STORAGE_CACHE_POLICY = {"hot" | "auto" | "cold" | "none"}]
| SUBPARTITION BY LIST(expression)
storage_cache_policy_option:
GLOBAL = {"hot" | "auto" | "cold" | "none"}
| timeline_strategy_list
timeline_strategy_list:
BOUNDARY_COLUMN = column_name
| BOUNDARY_COLUMN_UNIT = {"s" | "ms"}
| HOT_RETENTION = intnum retention_time_unit
| MIXED_RETENTION = intnum retention_time_unit
| GRANULARITY = {"partition" | "block"}
retention_time_unit:
YEAR
| MONTH
| WEEK
| DAY
| HOUR
| MINUTE
range_partition_list:
range_partition [, range_partition ...]
range_partition:
PARTITION partition_name
VALUES LESS THAN {(expression_list) | MAXVALUE}
[STORAGE_CACHE_POLICY = {"hot" | "auto" | "none"}]
range_subpartition_list:
range_subpartition [, range_subpartition ...]
range_subpartition:
SUBPARTITION subpartition_name
VALUES LESS THAN {(expression_list) | MAXVALUE}
[STORAGE_CACHE_POLICY = {"hot" | "auto" | "none"}]
expression_list:
expression [, expression ...]
column_name_list:
column_name [, column_name ...]
partition_name_list:
partition_name [, partition_name ...]
partition_count | subpartition_count:
INT_VALUE
table_column_group_option/index_column_group_option:
WITH COLUMN GROUP(all columns)
| WITH COLUMN GROUP(each column)
| WITH COLUMN GROUP(all columns, each column)
index_json_clause:
[UNIQUE] INDEX idx_json_name((CAST(json_column_name->'$.json_field_name' AS UNSIGNED ARRAY)))
| INDEX idx_json_name(column_name, [column_name, ...] (CAST(json_column_name->'$.json_field_name' AS CHAR(n) ARRAY)))
Parameters
| Parameter | Description |
|---|---|
| hint_options | Optional. Specifies the hint options. You can manually specify the bypass import hint, including APPEND, DIRECT, and NO_DIRECT. The corresponding hint format is /*+ [APPEND | DIRECT(need_sort,max_error,load_type)] parallel(N) |NO_DIRECT */. For more information about bypass importing data by using the CREATE TABLE AS SELECT statement, see the Use CREATE TABLE AS SELECT to bypass import data section in Full bypass import. |
| TEMPORARY | Optional. Indicates that the table is a temporary table.
Note |
| IF NOT EXISTS | If you specify IF NOT EXISTS, the system does not return an error even if the table to be created already exists. If you do not specify this clause and the table to be created already exists, the system returns an error. |
| IGNORE | REPLACE | Optional. Specifies how to handle rows with duplicate unique key values when you use the CREATE TABLE ... SELECT statement. If you do not specify IGNORE or REPLACE, an error is returned when there are rows with duplicate unique key values.
|
| PRIMARY KEY | Optional. Specifies the primary key of the table. If you do not specify this clause, a hidden primary key is used. OceanBase Database allows you to modify the primary key of a table or add a primary key to a table by using the ALTER TABLE statement. For more information, see ALTER TABLE. |
| FOREIGN KEY | Optional. Specifies the foreign key of the table. If you do not specify the foreign key name, the system generates the foreign key name by concatenating the table name, OBFK, and the creation time. For example, the foreign key name created on t1 at 00:00:00 on August 1, 2021 is t1_OBFK_1627747200000000. A foreign key allows cross-table references. When you perform an UPDATE or DELETE operation on a key value in the parent table that matches a row in the child table, the result depends on the ON UPDATE and ON DELETE clauses:
SET DEFAULT operation is supported. |
| FULLTEXT | Optional. Specifies to create a full-text index. For more information, see the Create a full-text index section in Create an index.
NoticeThis version supports only local full-text indexes. |
| SEARCH INDEX | Optional. Specifies to create a search index (Search Index). You can declare the search index column by column in SEARCH INDEX [index_name] (search_index_column [, search_index_column ...]). search_index_column supports column_name [WITH (...)]. WITH is a column-level option that takes effect only on JSON columns. It supports INCLUDE_PATHS, EXCLUDE_PATHS (only one of them can be specified), and INCLUDE_TYPES. For more information, see the Create a search index section in Create an index.
Notice |
| WITH PARSER tokenizer_option | Optional. Specifies the tokenizer for the full-text index. |
| PARSER_PROPERTIES[=](parser_properties_list) | Optional. Specifies the properties of the tokenizer. |
| FTS_INDEX_TYPE [=] {MATCH | PHRASE_MATCH} | Optional. Specifies the type of the full-text index. MATCH specifies a term-matching index, which supports matching and scoring based on the BM25 algorithm and stores the token, document ID, term frequency, and document length. PHRASE_MATCH specifies a phrase-matching index, which is a superset of the MATCH index and supports phrase queries more efficiently. It also stores the position list of the terms. |
| KEY | INDEX | Specify the key or index for the created table. If an index name is not specified, the first column in the index reference will be used as the index name. If the name is duplicated, it will be named with an underscore ( _ ) plus a sequence number. (For example, if the index created using the c1 column is named c1, it will be named c1_2.) You can use the SHOW INDEX statement to view the indexes on the table. |
| key_part | Create a function index. |
| index_col_name | Specify the column name for the index. ASC (ascending) is supported after each column name, but DESC (descending) is not. The default is ascending. The sorting method for the index is as follows: first, sort by the value of the first column in index_col_name; for records with the same value in this column, sort by the value of the next column name; and so on. |
| expr | Specify a valid function index expression, which can be a boolean expression, such as c1=c1.
NoticeOceanBase Database does not allow the creation of function indexes on generated columns in the current version. |
| ROW_FORMAT | Specify whether to enable the Encoding storage format for the table.
|
| [GENERATED ALWAYS] AS (expr) [VIRTUAL | STORED] | Create a generated column. expr is the expression used to calculate the column value.
|
| BLOCK_SIZE | Specify the microblock size for the table. |
| lob_inrow_threshold | Configure the INROW threshold for LOB fields. If the data size of a LOB field exceeds this threshold, the excess data will be stored as OUTROW in the LOB Meta table. The default value is controlled by the variable ob_default_lob_inrow_threshold. |
| COMPRESSION | Specify the compression algorithm for the table. Valid values:
|
| CHARSET | CHARACTER SET | Specify the default character set for columns in the table. For more information about the supported character sets, see Character set. |
| COLLATE | Specify the default collation for columns in the table. For more information about the supported collations, see Collation. |
| table_tablegroup | Specify the tablegroup to which the table belongs. |
| AUTO_INCREMENT | Specify the initial value for the auto-increment column in the table. OceanBase Database supports using an auto-increment column as a partitioning key. |
| COMMENT | Comments. Not case-sensitive. |
| PCTFREE | Specify the percentage of space reserved for macroblocks. |
| parallel_clause | Specify the parallelism level for the table:
|
| DUPLICATE_SCOPE | Specify the replication attribute for the table. Valid values:
cluster-level replication tables. |
| CHECK | Limit the value range of the column.
SHOW CREATE TABLE command.information_schema.TABLE_CONSTRAINTS view.information_schema.CHECK_CONSTRAINTS view. |
| constraint_name | The constraint name, which can be up to 64 characters in length.
|
| expression | The constraint expression.
|
| table_column_group_option/index_column_group_option | Specifies the column storage options for a table or index. The following table describes the options.
|
| SKIP_INDEX | Specifies the Skip Index attribute of a column. Valid values:
Notice
|
| index_json_clause | Specifies the clause for creating a multi-value index. For more information about multi-value indexes, see the Create a multi-value index section in Create an index.
NoticeThe multi-value index feature is currently in the experimental stage. We recommend that you do not use it in a production environment to avoid affecting system stability. |
| json_column_name | Specifies the name of the JSON column in the table. |
| idx_json_name | Specifies the name of the multi-value index to be created. |
| json_field_name | Specifies the name of the field in the JSON column to be indexed. |
| TABLE_MODE | Optional. Specifies the merge trigger threshold and merge strategy, which control the merge behavior after data is dumped. For more information about the values, see table_mode_value below. |
| auto_increment_cache_size | Optional. Specifies the number of cached auto-increment values. The default value is 0, indicating that this parameter is not configured. In this case, the tenant-level parameter auto_increment_cache_size is used as the cache size for auto-increment columns. |
| READ {ONLY | WRITE} | Specifies the read/write permission for a table. Valid values:
|
| ORGANIZATION [=] {INDEX | HEAP} | Specifies the storage order of data rows in a table, that is, the table organization mode. Valid values:
ORGANIZATION option, its value is the same as the value of the default_table_organization parameter. |
| enable_macro_block_bloom_filter [=] {True | False} | Specifies whether to persist the bloom filter at the macroblock level. Valid values:
|
| DYNAMIC_PARTITION_POLICY [=] (dynamic_partition_policy_list) | Specifies the dynamic partition management attribute of a table, enabling automatic creation and deletion of partitions. dynamic_partition_policy_list is a list of configurable parameters for dynamic partition policies, with each parameter separated by a comma. For more information, see dynamic_partition_policy_option below. |
| PARTITION BY RANGE [COLUMNS]([column_name_list]) [SIZE('size_value')] | Specifies the creation of an automatic partition table. For more information, see the automatic partitioning syntax when creating a table in Automatic partition splitting. |
| SEMISTRUCT_ENCODING_TYPE | Optional. Specifies the encoding type for semi-structured data. Valid values:
NoticeStarting from V4.4.1, |
| SEMISTRUCT_PROPERTIES | Optional. Specifies the encoding type of the semi-structured data in key-value pairs. Valid values:
For more information, see Use semi-structured encoding. NoticeThis parameter is supported starting from OceanBase Database V4.4.1. |
| MICRO_BLOCK_FORMAT_VERSION | Optional. Specifies the microblock storage format version of the table. Valid values: [1,+∞)
NoteThis parameter is introduced in OceanBase Database V4.4.1. |
| STORAGE_CACHE_POLICY | Optional. Specifies the cold/hot storage strategy for tables, partitions, and indexes in shared storage mode. Indicates whether to cache the specified data from object storage to the local cloud disk. If not specified, the system will adaptively cache the data. For more information, see storage_cache_policy. |
| HMS_CATALOG_NAME [=] string_value | Specifies the catalog name to access when connecting to Hive Metastore Service (HMS).NoteThis parameter is introduced in OceanBase Database V4.5.0. |
| CLUSTER BY (column_name_list) | Specifies the clustering column. For more information, see Specify the clustering column when you create a table in Create a table.
NoticeIn a MySQL tenant of OceanBase Database, if you do not explicitly specify the table organization type ( NoteThis parameter is introduced in OceanBase Database V4.6.0. |
| COLUMN_NAME_CASE_SENSITIVE [=] {True | False} | Specifies whether to enable case sensitivity for column names.
|
| DELTA_FORMAT [=] 'flat | encoding' | Specifies the storage format of incremental data. Valid values:
default_delta_format parameter to adjust the default storage format of incremental data for tables, without specifying it when creating tables. For more information, see default_delta_format. |
| SKIP_INDEX_LEVEL [=] {1 | 0} | Specifies whether to generate Skip Index aggregation information for incremental SSTables based on the baseline behavior. Valid values:
SKIP_INDEX_LEVEL parameter, the system will use the value of the tenant-level parameter default_skip_index_level to determine the default value of SKIP_INDEX_LEVEL. For more information, see default_skip_index_level. |
| MERGE_ENGINE = {delete_insert | partial_update | append_only} | Optional. Specifies the update model for the table. Valid values:
MERGE_ENGINE option, its value is the same as the value of the default_table_merge_engine configuration item.
NoteAfter you specify the |
| TTL [=] col_name + INTERVAL interval_num ttl_unit BY COMPACTION | Specifies the TTL policy for the table. When you create a TTL table, the update model MERGE_ENGINE can only be append_only or delete_insert. The specific explanations are as follows:
|
table_mode_value
Note
Among the following TABLE_MODE modes, all modes except NORMAL represent QUEUING tables. The QUEUING table is the most basic table type. The following modes (except for the NORMAL mode) represent more aggressive compaction strategies.
NORMAL: The default value. In this mode, the probability of triggering a major compaction after a data dump is very low.QUEUING: In this mode, the probability of triggering a major compaction after a data dump is low.MODERATE: In this mode, the probability of triggering a major compaction after a data dump is moderate.SUPER: In this mode, the probability of triggering a major compaction after a data dump is high.EXTREME: In this mode, the probability of triggering a major compact after a data dump is very high.
For more information about major compactions, see Adaptive major compaction.
tokenizer_option
SPACE: The default value, which indicates that the tokenizer splits text by spaces. You can specify the following parameters:Parameter Value range min_token_size [1, 16] max_token_size [10, 84] NGRAM: Indicates that the tokenizer splits text by N-Grams (Chinese). You can specify the following parameters:Parameter Value range ngram_token_size [1, 10] NGRAM2: Indicates that the tokenizer splits text into consecutive characters in the range ofmin_ngram_sizetomax_ngram_size. You can specify the following parameters:Parameter Value range min_ngram_size [1, 16] max_ngram_size [1, 16] BENG: Indicates that the tokenizer splits text by Beng (basic English). You can specify the following parameters:Parameter Value range min_token_size [1, 16] max_token_size [10, 84] IK: Indicates that the tokenizer splits text by IK (Chinese). Currently, only theutf-8character set is supported. You can specify the following parameters:Parameter Value range ik_mode smartmax_word
parser_properties
min_token_size: The minimum token length. Default value: 3. Value range: 1 to 16.max_token_size: The maximum token length. Default value: 84. Value range: 10 to 84.ngram_token_size: The token length for theNGRAMtokenizer. This parameter is valid only for theNGRAMtokenizer. Default value: 2. Value range: 1 to 10.ik_mode: The tokenization mode of theIKtokenizer. Valid values:smart: Default value. The dictionary is used to improve the accuracy of tokenization. The boundaries of the dictionary words are prioritized, which may reduce unnecessary extensions.max_word: The dictionary is used to identify the words defined in the dictionary. However, the maximum extension of tokenization is not affected. Even if the dictionary defines words, themax_wordmode will still attempt to split the text into more words.
min_ngram_size: The minimum token length. Value range: [1, 16].max_ngram_size: The maximum token length. Value range: [1, 16].
Here is an example:
CREATE TABLE tbl1 (col1 VARCHAR(200), col2 TEXT,
FULLTEXT INDEX fidx(col2) WITH PARSER ik PARSER_PROPERTIES=(ik_mode='max_word'));
dynamic_partition_policy_option
ENABLE = {true | false}: Optional. Specifies whether to enable dynamic partition management. This parameter can be modified. Valid values:true: Default value. Indicates that dynamic partition management is enabled.false: Indicates that dynamic partition management is disabled.
TIME_UNIT = {'hour' | 'day' | 'week' | 'month' | 'year'}: Required. Specifies the time unit for partitioning, that is, the interval for automatically creating partition boundaries. This parameter cannot be modified. Valid values:hour: Partitions are created by hour.day: Partitions are created by day.week: Partitions are created by week.month: Partitions are created by month.year: Partitions are created by year.
PRECREATE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}: Optional. Specifies the precreation time. When dynamic partition management is scheduled, partitions are precreated so that maximum partition upper bound > now() + precreate_time. This parameter can be modified. Valid values:-1: Default value. Indicates that no partitions are precreated.0: Indicates that only the current partition is precreated.n {hour | day | week | month | year}: Indicates the time span for precreating partitions. For example,3 hourindicates that partitions are precreated for 3 hours.
Note
- When multiple partitions are to be precreated, the partition boundary interval is
TIME_UNIT. - The first precreated partition boundary is the ceiling of the existing maximum partition boundary in
TIME_UNIT.
EXPIRE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}: Optional. Specifies the partition expiration time. When dynamic partition management is scheduled, all partitions with upper bounds < now() - expire_time are deleted. This parameter can be modified. Valid values:-1: Default value. Indicates that partitions never expire.0: Indicates that all partitions except the current one expire.n {hour | day | week | month | year}: Indicates the partition expiration time. For example,1 dayindicates that partitions expire after 1 day.
TIME_ZONE = {'default' | 'time_zone'}: Optional. Specifies the timezone information used for determining the current time and the partition key values of thedate,datetime, andyeartypes. This parameter cannot be modified. Valid values:default: Default value. Indicates that no timezone is specified and the tenant's timezone is used. For other types, thetime_zonefield must bedefault.time_zone: Indicates a custom timezone offset. For example,+8:00.
BIGINT_PRECISION = {'none'| 'ms' | 's'}: Optional. Specifies the timestamp precision of thebiginttype partition key. This parameter cannot be modified. Valid values:none: Default value. Indicates that no precision is specified (the partition key is not of thebiginttype).ms: Indicates millisecond precision.s: Indicates second precision.
For more information about how to create a dynamic partition table, see Create a dynamic partition table.
Here is an example:
CREATE TABLE tbl2 (col1 INT, col2 DATETIME)
DYNAMIC_PARTITION_POLICY(
ENABLE = true,
TIME_UNIT = 'hour',
PRECREATE_TIME = '3 hour',
EXPIRE_TIME = '1 day',
TIME_ZONE = '+8:00',
BIGINT_PRECISION = 'none')
PARTITION BY RANGE COLUMNS (col2)(
PARTITION P0 VALUES LESS THAN ('2024-11-11 13:30:00')
);
storage_cache_policy
Create a table-level storage cache policy
STORAGE_CACHE_POLICY is an optional parameter. If you do not specify the STORAGE_CACHE_POLICY parameter, the value of the tenant-level configuration parameter default_storage_cache_policy is used as the default value. The attributes in storage_cache_policy_option are defined in the Key-Value format. The semantics of each attribute are as follows:
GLOBAL = {"hot" | "auto"| "cold" | "none"}: specifies the cold/hot storage strategy for the data of the entire table. Valid values:hot: specifies that all data in the table is hot data. If the cache disk space is sufficient, all data in the table will be cached to the local cache disk.auto: specifies that the hot data of the table is automatically identified by the system.cold: specifies that the data of the table is cold data. The data will not be cached to the local cache disk. For data that has been persisted to the cache, the system will actively trigger cache eviction. Subsequent accesses to the table data can be stored in the macroblock memory cache, without being persisted to the macroblock cache and microblock cache.none: specifies that the strategy of this index follows the value of theSTORAGE_CACHE_POLICYparameter of the main table.Notice
The
noneattribute can only be used for indexes.
timeline_strategy_list: specifies the list of time-axis strategy parameters. Parameters are separated by commas. The time-axis strategy supports a mechanism for determining the cold/hot storage of partitions/macros based on time. The system automatically adjusts the data on the local cache disk based on the configured strategy.Note
A time axis is defined based on the partitioning range. The system caches hot data in the partition range. When a certain time condition is met, the data in the partition is considered hot data. When using the time-axis strategy, note the following
- Only Range-partitioned tables (Range partitioning can be at the primary or secondary level) are supported. This is because the system needs to determine whether the data has expired based on time. Double Range partitioning is not supported, as it is unclear which Range partition to use for time-based expiration.
- When using the time-axis strategy, the partitioning expression can only contain column names. Expressions are not supported. For example,
PARTITION BY RANGE COLUMNS(expr(col3))is not supported. BOUNDARY_COLUMNmust be a partitioning key. If the table has multiple partitioning keys,BOUNDARY_COLUMNmust be the first column, which is used to determine whether the partition has expired.
BOUNDARY_COLUMN = column_name: specifies the column used to determine hot data. Supported types are integer types (BIGINTorINT, formatted as Unix timestamps) and time types (TIMESTAMP,DATE,DATETIME, orYEAR).- If the
BOUNDARY_COLUMNtype is an integer, the table can be partitioned using the Range or Range Columns partitioning method. - If the
BOUNDARY_COLUMNtype is a time type, the table can only be partitioned using the Range Columns partitioning method. BOUNDARY_COLUMN_UNIT = {"s" | "ms"}: specifies the time unit for theBOUNDARY_COLUMNparameter. This parameter is only supported whenBOUNDARY_COLUMNis an integer. When an integer value is used as a timestamp, the timestamp unit must be specified to avoid incorrect parsing of the timestamp. Valid values:- If the partitioning column is of type
INT,BOUNDARY_COLUMN_UNITcan only bes. - If the partitioning column is of type
BIGINT,BOUNDARY_COLUMN_UNITcan besorms.Notice
If the format is not a Unix timestamp, the system cannot correctly identify the time represented by the
INTtype.
- If the partitioning column is of type
- If the
HOT_RETENTION = intnum retention_time_unit: specifies the time range for hot data.intnum: specifies an integer.retention_time_unit: specifies the time unit. Valid values:YEAR: specifies years.MONTH: specifies months.WEEK: specifies weeks.DAY: specifies days.HOUR: specifies hours.MINUTE: specifies minutes.
Notice
In the time-axis strategy, at least one of the
HOT_RETENTIONandMIXED_RETENTIONparameters must be specified.MIXED_RETENTION = intnum retention_time_unit:- If
MIXED_RETENTIONis specified, data beyondHOT_RETENTION + MIXED_RETENTIONis consideredcolddata. - If
MIXED_RETENTIONis not specified, data beyondHOT_RETENTIONis consideredautodata. - If neither
HOT_RETENTIONnorMIXED_RETENTIONis specified, data within [0, MIXED_RETENTION] is consideredautodata, and data beyondMIXED_RETENTIONis consideredcolddata.
- If
GRANULARITY = {"partition" | "block"}: optional. Specifies the granularity of the cold/hot storage strategy. Valid values:partition: specifies that the cold/hot storage strategy is applied at the partition level.block: specifies that the cold/hot storage strategy is applied at the macroblock level.Note
If the table is not partitioned, and you want to manually specify a period of time during which the data is hot, you can specify
block. For more information about the macroblock-level cold/hot storage strategy, see Create a macroblock-level cold/hot storage strategy.
Create a partition-level storage cache policy
The STORAGE_CACHE_POLICY option at the partition level is optional. Valid values are as follows:
hot: Indicates that all data in the specified partition is hot data. If the local cache disk space is sufficient, all data in the partition will be cached on the local cache disk.auto: Indicates that the hot data in the specified partition is automatically identified by the system.cold: Indicates that the data in the specified partition is cold data. It will not be cached on the local cache disk. For data that has been persisted to the cache, the system will actively trigger cache eviction. Subsequent table data accessed will be stored in the macroblock memory cache and will not be persisted to the macroblock cache or microblock cache.none: The default value. This indicates that the hot/cold storage strategy for the partition follows the value ofSTORAGE_CACHE_POLICYat the upper level.
Examples
Create a database table.
obclient> CREATE TABLE tbl1 (c1 INT PRIMARY KEY, c2 VARCHAR(50)); Query OK, 0 rows affectedCreate a table with an index.
obclient> CREATE TABLE tbl2 (c1 INT PRIMARY KEY, c2 INT, c3 INT, INDEX i1 (c2)); Query OK, 0 rows affectedCreate a table
tbl3with a function index.obclient> CREATE TABLE tbl3 (c1 INT, c2 INT, INDEX i1 ((c1+1)), UNIQUE KEY ((c1+c2))); Query OK, 0 rows affected obclient> SHOW CREATE TABLE tbl3; +-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Table | Create Table | +-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | tbl3 | CREATE TABLE `tbl3` ( `c1` int(11) DEFAULT NULL, `c2` int(11) DEFAULT NULL, UNIQUE KEY `functional_index` ((`c1` + `c2`)) BLOCK_SIZE 16384 LOCAL, KEY `i1` ((`c1` + 1)) BLOCK_SIZE 16384 LOCAL ) DEFAULT CHARSET = utf8mb4 ROW_FORMAT = DYNAMIC COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 2 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 0 | +-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+Create a table with 8 hash partitions.
obclient> CREATE TABLE tbl4 (c1 INT PRIMARY KEY, c2 INT) PARTITION BY HASH(c1) PARTITIONS 8; Query OK, 0 rows affectedCreate a table with range partitions as the primary partitions and key partitions as the subpartitions.
obclient> CREATE TABLE tbl5 (c1 INT, c2 INT, c3 INT) PARTITION BY RANGE(c1) SUBPARTITION BY KEY(c2, c3) SUBPARTITIONS 5 (PARTITION p0 VALUES LESS THAN(0), PARTITION p1 VALUES LESS THAN(100)); Query OK, 0 rows affectedCreate a table with one column of type
gbkand one column of typeutf8.obclient> CREATE TABLE tbl6 (c1 VARCHAR(10), c2 VARCHAR(10) CHARSET GBK COLLATE gbk_bin) DEFAULT CHARSET utf8 COLLATE utf8mb4_general_ci; Query OK, 0 rows affectedEnable encoding and use
zstdcompression, with 5% space reserved for macroblocks.obclient> CREATE TABLE tbl7 (c1 INT, c2 INT, c3 VARCHAR(64)) COMPRESSION 'zstd_1.0' ROW_FORMAT DYNAMIC PCTFREE 5; Query OK, 0 rows affectedCreate a table
tbl8and set the parallelism to3.obclient> CREATE TABLE tbl8(c1 INT PRIMARY KEY, c2 INT) PARALLEL 3; Query OK, 0 rows affectedUse an auto-increment column as the partition key.
obclient> CREATE TABLE tbl9(inv_id BIGINT NOT NULL AUTO_INCREMENT,c1 BIGINT, PRIMARY KEY (inv_id) ) PARTITION BY HASH(inv_id) PARTITIONS 8; Query OK, 0 rows affectedSpecify a foreign key for the
ref_t2table. When anUPDATEoperation affects the key values in the parent table that match the rows in the child table, execute theSET NULLoperation.obclient> CREATE TABLE ref_t1(c1 INT PRIMARY KEY, C2 INT); Query OK, 0 rows affected obclient> CREATE TABLE ref_t2(c1 INT PRIMARY KEY, C2 INT,FOREIGN KEY(c2) REFERENCES ref_t1(c1) ON UPDATE SET NULL); Query OK, 0 rows affectedCreate a table
tbl10with aCHECKconstraint and view the constraint information.obclient> CREATE TABLE tbl10 (col1 INT, col2 INT, col3 INT, CONSTRAINT equal_check1 CHECK(col1 = col3 * 2)); Query OK, 0 rows affected obclient> SHOW CREATE TABLE tbl10; +-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Table | Create Table | +-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | tbl10 | CREATE TABLE `tbl10` ( `col1` int(11) DEFAULT NULL, `col2` int(11) DEFAULT NULL, `col3` int(11) DEFAULT NULL, CONSTRAINT `equal_check1` CHECK ((`col1` = (`col3` * 2))) ) DEFAULT CHARSET = utf8mb4 ROW_FORMAT = DYNAMIC COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 1 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 0 | +-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1 row in setCreate a table
tbl11based on thetbl10table and view the table information.obclient> CREATE TABLE tbl11 LIKE tbl10; Query OK, 0 rows affected obclient> SHOW CREATE TABLE tbl11; +-------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Table | Create Table | +-------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | tbl11 | CREATE TABLE `tbl11` ( `col1` int(11) DEFAULT NULL, `col2` int(11) DEFAULT NULL, `col3` int(11) DEFAULT NULL, CONSTRAINT `tbl11_OBCHECK_1650793233327894` CHECK ((`col1` = (`col3` * 2))) ) DEFAULT CHARSET = utf8mb4 ROW_FORMAT = DYNAMIC COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 1 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 0 | +-------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1 row in setCreate a
cluster-level replicated tabledup_t1.Log in to the sys tenant and create a unit.
obclient> CREATE RESOURCE UNIT IF NOT EXISTS 2c5g MAX_CPU 2, MEMORY_SIZE '5G'; Query OK, 0 rows affectedCreate a resource pool with 2 units.
obclient> CREATE RESOURCE POOL tenant_pool UNIT = '2c5g', UNIT_NUM = 2, ZONE_LIST = ('z1', 'z2', 'z3'); Query OK, 0 rows affectedCreate a user tenant
obmysqland specify the locality distribution.obclient> CREATE TENANT obmysql resource_pool_list=('tenant_pool'), LOCALITY = "F@z1, F@z2, R@z3", PRIMARY_ZONE = "z1" SET ob_tcp_invited_nodes='%'; Query OK, 0 rows affectedLog in to the
obmysqluser tenant created in step 3 and create a replicated table.obclient> USE test; Database changed obclient> CREATE TABLE dup_t1(c1 INT) DUPLICATE_SCOPE = 'cluster'; Query OK, 0 rows affected(Optional) View the broadcast log stream information. The replicated table will be created on this log stream.
obclient> SELECT * FROM oceanbase.DBA_OB_LS WHERE FLAG LIKE "%DUPLICATE%"; +-------+--------+--------------+---------------+-------------+---------------------+----------+---------------------+---------------------+-----------+ | LS_ID | STATUS | PRIMARY_ZONE | UNIT_GROUP_ID | LS_GROUP_ID | CREATE_SCN | DROP_SCN | SYNC_SCN | READABLE_SCN | FLAG | +-------+--------+--------------+---------------+-------------+---------------------+----------+---------------------+---------------------+-----------+ | 1003 | NORMAL | z1;z2 | 0 | 0 | 1683267390195713284 | NULL | 1683337744205408139 | 1683337744205408139 | DUPLICATE | +-------+--------+--------------+---------------+-------------+---------------------+----------+---------------------+---------------------+-----------+ 1 row in set(Optional) View the replica distribution of the replicated table in the sys tenant. The
REPLICA_TYPEfield indicates the replica type.obclient> SELECT * FROM oceanbase.CDB_OB_TABLE_LOCATIONS WHERE TABLE_NAME = "dup_t1"; +-----------+---------------+------------+----------+------------+----------------+-------------------+------------+---------------+-----------+-------+------+----------------+----------+----------+--------------+-----------------+ | TENANT_ID | DATABASE_NAME | TABLE_NAME | TABLE_ID | TABLE_TYPE | PARTITION_NAME | SUBPARTITION_NAME | INDEX_NAME | DATA_TABLE_ID | TABLET_ID | LS_ID | ZONE | SVR_IP | SVR_PORT | ROLE | REPLICA_TYPE | DUPLICATE_SCOPE | +-----------+---------------+------------+----------+------------+----------------+-------------------+------------+---------------+-----------+-------+------+----------------+----------+----------+--------------+-----------------+ | 1002 | test | dup_t1 | 500002 | USER TABLE | NULL | NULL | NULL | NULL | 200001 | 1003 | z1 | 11.xxx.xxx.xxx | 36125 | LEADER | FULL | CLUSTER | | 1002 | test | dup_t1 | 500002 | USER TABLE | NULL | NULL | NULL | NULL | 200001 | 1003 | z1 | 11.xxx.xxx.xxx | 36124 | FOLLOWER | READONLY | CLUSTER | | 1002 | test | dup_t1 | 500002 | USER TABLE | NULL | NULL | NULL | NULL | 200001 | 1003 | z2 | 11.xxx.xxx.xxx | 36127 | FOLLOWER | FULL | CLUSTER | | 1002 | test | dup_t1 | 500002 | USER TABLE | NULL | NULL | NULL | NULL | 200001 | 1003 | z2 | 11.xxx.xxx.xxx | 36126 | FOLLOWER | READONLY | CLUSTER | | 1002 | test | dup_t1 | 500002 | USER TABLE | NULL | NULL | NULL | NULL | 200001 | 1003 | z3 | 11.xxx.xxx.xxx | 36128 | FOLLOWER | READONLY | CLUSTER | | 1002 | test | dup_t1 | 500002 | USER TABLE | NULL | NULL | NULL | NULL | 200001 | 1003 | z3 | 11.xxx.xxx.xxx | 36129 | FOLLOWER | READONLY | CLUSTER | +-----------+---------------+------------+----------+------------+----------------+-------------------+------------+---------------+-----------+-------+------+----------------+----------+----------+--------------+-----------------+ 6 rows in setInsert and read/write data from the replicated table like a regular table. For a read request, if you use a proxy, the read request may be routed to any OBServer node. If you connect directly to an OBServer node, as long as the local replica is readable, the read request will be executed on the connected OBServer node.
obclient> INSERT INTO dup_t1 VALUES(1); Query OK, 1 row affected obclient> SELECT * FROM dup_t1; +------+ | c1 | +------+ | 1 | +------+ 1 row in set
Create a columnstore table
tbl1_cg.obclient> CREATE TABLE tbl1_cg (col1 INT PRIMARY KEY, col2 VARCHAR(50)) WITH COLUMN GROUP(each column);Create a table with a columnstore index.
obclient> CREATE TABLE tbl2_cg (col1 INT PRIMARY KEY, col2 INT, col3 INT, INDEX i1 (col2) WITH COLUMN GROUP(each column));Create a columnstore table with a columnstore index.
obclient> CREATE TABLE tbl3_cg (col1 INT PRIMARY KEY, col2 INT, col3 INT, INDEX i1 (col2) WITH COLUMN GROUP(each column)) WITH COLUMN GROUP(each column);Specify the Skip Index attribute for a column when creating the table.
obclient> CREATE TABLE test_skidx( col1 INT SKIP_INDEX(MIN_MAX, SUM), col2 FLOAT SKIP_INDEX(MIN_MAX), col3 VARCHAR(1024) SKIP_INDEX(MIN_MAX), col4 CHAR(10) );Create a table
tbl1with an integer columncol1and specify that the operation uses 5 parallel threads. Also, specify that the data content of the new tabletbl1will be sourced from the query results of tabletbl2.obclient> CREATE /*+ parallel(5) */ TABLE tbl1 (col1 INT) AS SELECT col1 FROM tbl2;Create a table
tbwith a persistent bloom filter at the macroblock level.obclient> CREATE TABLE tb(c1 INT PRIMARY KEY, c2 INT) enable_macro_block_bloom_filter = True;Create a table
tbl1with a semi-structured encoding type ofencodingand a frequency threshold of90.obclient> CREATE TABLE tbl1 (col1 INT PRIMARY KEY, col2 INT) SEMISTRUCT_PROPERTIES=(ENCODING_TYPE=ENCODING, FREQ_THRESHOLD=90);Create a table
tbusing the new Flat row storage format (version 2).obclient> CREATE TABLE tb(c1 INT PRIMARY KEY, c2 INT) micro_block_format_version = 2;Create a table with a data expiration time of 7 days.
obclient(root@mysql001)[infotest]> CREATE TABLE ttl_tbl1( id INT PRIMARY KEY, val VARCHER(100) ) MERGE_ENGINE = append_only TTL ora_rowscn + INTERVAL 7 DAY BY COMPACTION;Create a table
tbl_eand specify the incremental data format as the new encoding incremental data format.obclient> CREATE TABLE tbl_e(col1 INT PRIMARY KEY, col2 INT) DELTA_FORMAT = 'encoding';
Create a table-level STORAGE_CACHE_POLICY
Manually specify hot data.
CREATE TABLE test_table1 (c1 INT, c2 INT) storage_cache_policy (global = "hot");Specify hot data using a time axis.
CREATE TABLE test_part_func_expr ( id INT, event_time DATETIME NOT NULL, data VARCHAR(100)) STORAGE_CACHE_POLICY ( BOUNDARY_COLUMN = event_time, HOT_RETENTION =1 YEAR ) PARTITION BY RANGE COLUMNS(`event_time`) ( PARTITION p_prev3h VALUES LESS THAN ("2018-01-01 00:00:00"), PARTITION p_prev2h VALUES LESS THAN ("2019-01-01 00:00:00"), PARTITION p_prev1h VALUES LESS THAN ("2020-01-01 00:00:00"), PARTITION p_current VALUES LESS THAN ("2021-01-01 00:00:00"), PARTITION p_next1h VALUES LESS THAN ("2022-01-01 00:00:00"), PARTITION p_max VALUES LESS THAN MAXVALUE );
Create a partition-level STORAGE_CACHE_POLICY
Specify the STORAGE_CACHE_POLICY for the primary partitions when creating a partitioned table.
CREATE TABLE tbl3 (col1 INT, col2 INT, col3 INT) PARTITION BY RANGE(col1) SUBPARTITION BY KEY(col2, col3) SUBPARTITIONS 5 (PARTITION p0 VALUES LESS THAN(0) STORAGE_CACHE_POLICY = "hot", PARTITION p1 VALUES LESS THAN(100) STORAGE_CACHE_POLICY = "auto" );Specify the STORAGE_CACHE_POLICY for the subpartitions when creating a partitioned table.
CREATE TABLE tbl4 (col1 INT, col2 INT) PARTITION BY RANGE(col1) SUBPARTITION BY RANGE(col2) (PARTITION p0 VALUES LESS THAN(100) (SUBPARTITION sp0 VALUES LESS THAN(2020) STORAGE_CACHE_POLICY = "hot", SUBPARTITION sp1 VALUES LESS THAN(2021)), PARTITION p1 VALUES LESS THAN(200) (SUBPARTITION sp2 VALUES LESS THAN(2020), SUBPARTITION sp3 VALUES LESS THAN(2021) STORAGE_CACHE_POLICY = "hot", SUBPARTITION sp4 VALUES LESS THAN(2022)) );Specify the STORAGE_CACHE_POLICY for both the index and the table partitions when creating a table.
CREATE TABLE tbl5 ( c1 INT, c2 VARCHAR(50), INDEX idx1 (c1) STORAGE_CACHE_POLICY = (GLOBAL = "hot"), INDEX idx2 (c2) STORAGE_CACHE_POLICY = (GLOBAL = "auto") ) PARTITION BY RANGE (c1) (PARTITION p1 VALUES LESS THAN (100) STORAGE_CACHE_POLICY = "hot", PARTITION p2 VALUES LESS THAN (200) STORAGE_CACHE_POLICY = "auto", PARTITION p3 VALUES LESS THAN MAXVALUE STORAGE_CACHE_POLICY = "none");Example explanation:
- When creating the table
tbl5, two indexes are created, each with a different storage cache policy. - The table is partitioned by the
c1column using range partitioning, with each partition having a different storage cache policy. - The STORAGE_CACHE_POLICY for the index and the table partitions can be independently set to enable fine-grained storage management.
- When creating the table
