CREATE TABLE|V4.4.2|OceanBase Database| docs|Distributed Database

CREATE TABLE

Last Updated：2026-04-23 09:19:02 Updated

Purpose

This statement creates a new table in the database.

Syntax

CREATE [hint_options] [TEMPORARY] TABLE [IF NOT EXISTS] table_name
      (table_definition_list) [table_option_list] [partition_option] [[MERGE_ENGINE = {delete_insert | partial_update}] table_column_group_option] [IGNORE | REPLACE] [AS] select;

CREATE TABLE [TEMPORARY] [IF NOT EXISTS] table_name
      LIKE table_name;

table_definition_list:
    table_definition [, table_definition ...]

table_definition:
      column_definition_list
    | [CONSTRAINT [constraint_name]] PRIMARY KEY index_desc
    | [CONSTRAINT [constraint_name]] UNIQUE {INDEX | KEY}
            [index_name] index_desc
    | [CONSTRAINT [constraint_name]] FOREIGN KEY
            [index_name] index_desc
            REFERENCES reference_definition
            [match_action][opt_reference_option_list]
    | [FULLTEXT] {INDEX | KEY} [index_name] [index_type] (key_part,...) [WITH PARSER tokenizer_option] [PARSER_PROPERTIES[=](parser_properties_list)]
      [index_option_list] [index_column_group_option]
    | index_json_clause
    | [CONSTRAINT [constraint_name]] CHECK(expression) constranit_state

column_definition_list:
    column_definition [, column_definition ...]

column_definition:
     column_name data_type
         [DEFAULT const_value] [AUTO_INCREMENT]
         [NULL | NOT NULL] [[PRIMARY] KEY] [UNIQUE [KEY]] [COMMENT string_value] [SKIP_INDEX(skip_index_option_list)]
   | column_name data_type
         [GENERATED ALWAYS] AS (expr) [VIRTUAL | STORED]
         [opt_generated_column_attribute]

skip_index_option_list:
    skip_index_option [,skip_index_option ...]

skip_index_option:
    MIN_MAX
    | SUM

index_desc:
   (column_desc_list) [index_type] [index_option_list]

match_action:
   MATCH {SIMPLE | FULL | PARTIAL}

opt_reference_option_list:
   reference_option [,reference_option ...]

reference_option:
   ON {DELETE | UPDATE} {RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT}

tokenizer_option:
    SPACE
    | NGRAM
    | BENG
    | IK
    | NGRAM2

parser_properties_list:
    parser_properties, [parser_properties]

parser_properties:
    min_token_size = int_value
    | max_token_size = int_value
    | ngram_token_size = int_value
    | ik_mode = 'char_value'
    | min_ngram_size = int_value
    | max_ngram_size = int_value

key_part:
    {index_col_name [(length)] | (expr)} [ASC | DESC]

index_type:
    USING BTREE

index_option_list:
    index_option [ index_option ...]

index_option:
      [GLOBAL | LOCAL]
    | block_size
    | compression
    | STORING(column_name_list)
    | COMMENT string_value
    | STORAGE_CACHE_POLICY(storage_cache_policy_option)

table_option_list:
    table_option [ table_option ...]

table_option:
      [DEFAULT] {CHARSET | CHARACTER SET} [=] charset_name
    | [DEFAULT] COLLATE [=] collation_name
    | table_tablegroup
    | block_size
    | lob_inrow_threshold [=] num
    | compression
    | AUTO_INCREMENT [=] INT_VALUE
    | COMMENT string_value
    | ROW_FORMAT [=] REDUNDANT|COMPACT|DYNAMIC|COMPRESSED|DEFAULT
    | PCTFREE [=] num
    | parallel_clause
    | DUPLICATE_SCOPE [=] 'none|cluster'
    | TABLE_MODE [=] 'table_mode_value'
    | auto_increment_cache_size [=] INT_VALUE
    | READ {ONLY | WRITE}
    | ORGANIZATION [=] {INDEX | HEAP}
    | enable_macro_block_bloom_filter [=] {True | False}
    | DYNAMIC_PARTITION_POLICY [=] (dynamic_partition_policy_list)
    | SEMISTRUCT_ENCODING_TYPE [=] 'encoding' # Deprecated from V4.4.1. Use SEMISTRUCT_PROPERTIES instead.
    | MICRO_BLOCK_FORMAT_VERSION [=] {1|2}
    | STORAGE_CACHE_POLICY (storage_cache_policy_option)

parallel_clause:
    {NOPARALLEL | PARALLEL integer}

table_mode_value:
    NORMAL
    | QUEUING
    | MODERATE
    | SUPER
    | EXTREME

dynamic_partition_policy_list:
    dynamic_partition_policy_option [, dynamic_partition_policy_option ...]

dynamic_partition_policy_option:
    ENABLE = {true | false}
    | TIME_UNIT = {'hour' | 'day' | 'week' | 'month' | 'year'}
    | PRECREATE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}
    | EXPIRE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}
    | TIME_ZONE = {'default' | 'time_zone'}
    | BIGINT_PRECISION = {'none' | 'us' | 'ms' | 's'}

partition_option:
      PARTITION BY HASH(expression)
      [subpartition_option] PARTITIONS partition_count
    | PARTITION BY KEY([column_name_list])
      [subpartition_option] PARTITIONS partition_count
    | PARTITION BY RANGE {(expression) | COLUMNS (column_name_list)}
      [subpartition_option] (range_partition_list) [STORAGE_CACHE_POLICY = {"hot" | "auto" | "none"}]
    | PARTITION BY LIST {(expression) | COLUMNS (column_name_list)}
      [subpartition_option] PARTITIONS partition_count
    | PARTITION BY RANGE [COLUMNS]([column_name_list]) [SIZE('size_value')] (range_partition_list)

subpartition_option:
      SUBPARTITION BY HASH(expression)
      SUBPARTITIONS subpartition_count
    | SUBPARTITION BY KEY(column_name_list)
      SUBPARTITIONS subpartition_count
    | SUBPARTITION BY RANGE {(expression) | COLUMNS (column_name_list)}
      (range_subpartition_list) [STORAGE_CACHE_POLICY = {"hot" | "auto" | "none"}]
    | SUBPARTITION BY LIST(expression)

storage_cache_policy_option:
    GLOBAL = {"hot" | "auto" | "none"}
    | timeline_strategy_list

timeline_strategy_list:
    BOUNDARY_COLUMN = column_name [,BOUNDARY_COLUMN_UNIT = {"s"| "ms"}] ,HOT_RETENTON = intnum retention_time_unit

retention_time_unit:
    YEAR
    | MONTH
    | WEEK
    | DAY
    | HOUR
    | MINUTE
range_partition_list:
    range_partition [, range_partition ...]

range_partition:
    PARTITION partition_name
    VALUES LESS THAN {(expression_list) | MAXVALUE}
    [STORAGE_CACHE_POLICY = {"hot" | "auto" | "none"}]

range_subpartition_list:
    range_subpartition [, range_subpartition ...]

range_subpartition:
    SUBPARTITION subpartition_name
    VALUES LESS THAN {(expression_list) | MAXVALUE}
    [STORAGE_CACHE_POLICY = {"hot" | "auto" | "none"}]

expression_list:
    expression [, expression ...]

column_name_list:
    column_name [, column_name ...]

partition_name_list:
    partition_name [, partition_name ...]

partition_count | subpartition_count:
    INT_VALUE

table_column_group_option/index_column_group_option:
      WITH COLUMN GROUP(all columns)
    | WITH COLUMN GROUP(each column)
    | WITH COLUMN GROUP(all columns, each column)

index_json_clause:
    [UNIQUE] INDEX idx_json_name((CAST(json_column_name->'$.json_field_name' AS UNSIGNED ARRAY)))
    | INDEX idx_json_name(column_name, [column_name, ...] (CAST(json_column_name->'$.json_field_name' AS CHAR(n) ARRAY)))

Parameters

Parameter	Description
hint_options	Optional. Specifies the Hint option. You can manually specify the bypass import Hint, including APPEND, DIRECT, and NO_DIRECT. The corresponding Hint format is `/+ [APPEND \| DIRECT(need_sort,max_error,load_type)] parallel(N) \|NO_DIRECT /`. For more information about bypass import in the CREATE TABLE AS SELECT statement, see the Use the CREATE TABLE AS SELECT statement to bypass import section in Full bypass import.
TEMPORARY	Optional. Indicates that the table is a temporary table. Note For OceanBase Database V4.4.x, this parameter is supported starting from V4.4.2.
IF NOT EXISTS	If you specify `IF NOT EXISTS`, an error will not be returned even if the table to be created already exists. If you do not specify this option and the table to be created already exists, an error will be returned.
IGNORE \| REPLACE	Optional. Specifies how to handle rows with duplicate unique key values when you use the `CREATE TABLE ... SELECT` statement. If you do not specify `IGNORE` or `REPLACE`, an error will be returned when there are rows with duplicate unique key values. `IGNORE`: If a unique key value in the new table conflicts with a row in the copied data, the conflicting row will be ignored and will not be copied to the new table. In other words, the `IGNORE` option skips duplicate data and copies only rows that do not cause unique key conflicts. `REPLACE`: If a row with duplicate unique key values is encountered, the existing row in the new table will be replaced with the new data.
PRIMARY KEY	Specifies the primary key of the created table. If you do not specify this option, a hidden primary key will be used. OceanBase Database allows you to modify the primary key of a table or add a primary key to a table by using the `ALTER TABLE` statement. For more information, see ALTER TABLE.
FOREIGN KEY	Specifies the foreign key of the created table. If you do not specify the foreign key name, the foreign key name will be automatically generated as the table name + `OBFK` + the creation time. (For example, the foreign key name created for the `t1` table on August 1, 2021, at 00:00:00 is `t1_OBFK_1627747200000000`.) A foreign key allows cross-table references to related data. When a `UPDATE` or `DELETE` operation affects the key values in the parent table that match the rows in the child table, the result depends on the `ON UPDATE` and `ON DELETE` clauses: `CASCADE`: Indicates that when a row is deleted or updated in the parent table, the corresponding rows in the child table are automatically deleted or updated. `SET NULL`: Indicates that when a row is deleted or updated in the parent table, the foreign key column in the child table is set to `NULL`. `RESTRICT`: Indicates that the deletion or update operation on the parent table is rejected. `NO ACTION`: Specifies deferred checking. Additionally, the `SET DEFAULT` operation is supported.
FULLTEXT	Optional. Specifies whether to create a full-text index. For more information about creating a full-text index, see the Create a full-text index section in Create an index. Notice Only local full-text indexes are supported in the current version.
WITH PARSER tokenizer_option	Optional. Specifies the tokenizer for the full-text index. For more information, see tokenizer_option below.
PARSER_PROPERTIES[=](parser_properties_list)	Optional. Specifies the properties of the tokenizer. For more information, see parser_properties below.
KEY \| INDEX	Specifies the key or index of the created table. If you do not specify the index name, the index name will be automatically generated as the first column of the index reference. If the index name is duplicated, the index name will be generated as the column name + underscore (_) + sequence number. (For example, if the index created using the `c1` column is duplicated, the index will be named `c1_2`.) You can use the `SHOW INDEX` statement to view the indexes on a table.
key_part	Creates a (function) index.
index_col_name	Specifies the column name of the index. ASC (ascending) is supported after each column name, but DESC (descending) is not supported. The default is ascending. The sorting method for the index is as follows: first, sort by the value of the first column in `index_col_name`; for records with the same value in this column, sort by the value of the next column name; and so on.
expr	Specifies a valid function index expression, which can be a Boolean expression, for example, `c1=c1`. Notice OceanBase Database does not allow you to create function indexes on generated columns in the current version.
ROW_FORMAT	Specifies whether to enable the Encoding storage format for the table. `redundant`: Does not enable the Encoding storage format. `compact`: Does not enable the Encoding storage format. `dynamic`: Enables the Encoding storage format. `compressed`: Enables the Encoding storage format. `default`: Equivalent to `dynamic` mode.
[GENERATED ALWAYS] AS (expr) [VIRTUAL \| STORED]	Creates a generated column. `expr` specifies the expression used to calculate the column value. `VIRTUAL`: The column value is not stored. Instead, it is calculated immediately after any `BEFORE` trigger when a row is read. Virtual columns do not occupy storage space. `STORED`: The column value is evaluated and stored when a row is inserted or updated. Stored columns do occupy storage space and can be indexed.
BLOCK_SIZE	Specifies the microblock size of the table.
lob_inrow_threshold	Specifies the `INROW` threshold for the `LOB` field. If the size of a `LOB` field exceeds this threshold, the excess data will be stored in the `OUTROW` format in the LOB Meta table. The default value is controlled by the variable `ob_default_lob_inrow_threshold`.
COMPRESSION	Specifies the compression algorithm for the table. Valid values: `none`: No compression algorithm is used. `lz4_1.0`: The `lz4` compression algorithm is used. `zstd_1.0`: The `zstd` compression algorithm is used. `snappy_1.0`: The `snappy` compression algorithm is used.
CHARSET \| CHARACTER SET	Specifies the default character set for columns in the table. For more information about the character sets, see Character sets.
COLLATE	Specifies the default collation for columns in the table. For more information about the collations, see Collations.
table_tablegroup	Specifies the `tablegroup` to which the table belongs.
AUTO_INCREMENT	Specifies the initial value for the auto-increment column. OceanBase Database supports using an auto-increment column as a partitioning key.
COMMENT	Comments. Not case-sensitive.
PCTFREE	Specifies the percentage of space reserved for macroblocks.
parallel_clause	Specifies the parallelism level for the table: `NOPARALLEL`: The parallelism level is `1`, which is the default. `PARALLEL integer`: Specifies the parallelism level, where `integer` is greater than or equal to `1`.
DUPLICATE_SCOPE	Specifies the replication attribute of the table. Valid values: `none`: The table is a regular table. This is the default value. `cluster`: The table is a replicated table. The Leader needs to replicate transactions to all F (full-featured) and R (read-only) replicas of the current tenant. OceanBase Database currently only supports replicated tables at the `cluster` level.
CHECK	Restricts the range of values in the column. If you define a `CHECK` constraint for a single column, you can specify the constraint in the column definition and assign a name to it. If you define a `CHECK` constraint for a table, the constraint applies to multiple columns in the table and can appear before the column definitions. When you drop the table, the `CHECK` constraints created in the table are also dropped. You can view the constraint information in the following ways: Use the `SHOW CREATE TABLE` statement. View the `information_schema.TABLE_CONSTRAINTS` view. View the `information_schema.CHECK_CONSTRAINTS` view.
constraint_name	The name of the constraint, which can be up to 64 characters in length. The constraint name can contain spaces at the beginning, middle, and end, but the name must be enclosed in backticks (`). <li> The constraint name can contain the special character "$". <li> If the constraint name is a reserved word, it must be enclosed in backticks. Otherwise, an error will be returned. <li> The names of`CHECK` constraints must be unique within the same database.
expression	The constraint expression. `expression` cannot be empty. The result of `expression` must be a boolean value. `expression` cannot contain columns that do not exist.
MERGE_ENGINE = {delete_insert \| partial_update}	Optional. Specifies the update model for the table. Valid values: `partial_update`: The default value, indicating that the existing update model remains unchanged. `delete_insert`: Indicates that the `merge on write` model is used, where `update` operations are converted into `delete` and `insert` operations, and all columns are written. Note After the `MERGE_ENGINE` parameter is specified when creating a table, its configuration value cannot be modified.
table_column_group_option/index_column_group_option	Specifies the columnar storage options for the table or index. The following table describes the options: `WITH COLUMN GROUP(all columns, each column)`: Specifies to create a redundant rowstore and columnstore table or index. `WITH COLUMN GROUP(all columns)`: Specifies to add a rowstore table or index. `WITH COLUMN GROUP(each column)`: Specifies to create a columnstore table or index.
SKIP_INDEX	Specifies the Skip Index attribute of the column. Valid values: `MIN_MAX`: This is the most commonly used aggregate data type in Skip Index. It stores the maximum and minimum values and the count of null values of the indexed column at the granularity of index nodes. This type of data can accelerate the pushdown of filters and `MIN/MAX` aggregations. `SUM`: This is used to accelerate the pushdown of `SUM` aggregations for numeric types. Notice You cannot create a Skip Index for columns of the JSON or spatial data type. Generated columns cannot have Skip Indexes.
index_json_clause	Specifies the clause for creating a multi-value index. For more information about multi-value indexes, see the Create a multi-value index section in Create an index. Notice Multi-value indexes are currently in the experimental stage. We recommend that you do not use them in a production environment to avoid affecting system stability.
json_column_name	Specifies the name of the `JSON` data type column in the table.
idx_json_name	Specifies the name of the multi-value index to be created.
json_field_name	Specifies the name of the field in the JSON column to be indexed.
TABLE_MODE	Optional. Specifies the merge trigger threshold and merge strategy, which control the merge behavior after data is dumped. For more information about the values, see table_mode_value below.
auto_increment_cache_size	Optional. Specifies the number of cached auto-increment values. The default value is 0, which indicates that this parameter is not configured. In this case, the system uses the tenant-level parameter auto_increment_cache_size as the cache size for auto-increment columns.
READ {ONLY \| WRITE}	Specifies the read/write permissions for the table. Valid values: `READ ONLY`: indicates that the table can be read but cannot be modified or deleted. Notice The `READ ONLY` parameter at the table level does not take effect for users with the `SUPER` privilege. It only takes effect for ordinary users. `READ WRITE`: default value. Indicates that the table can be read, modified, and deleted.
ORGANIZATION [=] {INDEX \| HEAP}	Optional. Specifies the storage order of data rows in the table, that is, the table organization mode. Valid values: `INDEX`: specifies an index-organized table. `HEAP`: specifies a heap-organized table. If this option is not specified, its value is the same as the value of the default_table_organization parameter.
enable_macro_block_bloom_filter [=] {True \| False}	Specifies whether to persist the bloom filter at the macroblock level. Valid values: `True`: enables persistence of the bloom filter at the macroblock level. `False`: disables persistence of the bloom filter at the macroblock level.
DYNAMIC_PARTITION_POLICY [=] (dynamic_partition_policy_list)	Specifies the dynamic partition management attribute of the table to automatically create and delete partitions. `dynamic_partition_policy_list` specifies the configurable parameters of the dynamic partition strategy. Parameters are separated by commas. For more information, see dynamic_partition_policy_option below.
PARTITION BY RANGE [COLUMNS]([column_name_list]) [SIZE('size_value')]	Specifies the clause for creating an automatic partition table. For more information, see the syntax for creating an automatic partition table in Automatic partition splitting. Note For V4.4.2, the lower limit of the automatic partition splitting threshold has been adjusted from 128 MB to 1 MB starting from V4.4.2 BP1.
SEMISTRUCT_ENCODING_TYPE	Optional. Specifies the encoding type for semi-structured data. Valid values: `''`: disables semi-structured encoding. `'encoding'`: enables semi-structured encoding. Notice Starting from V4.4.1, `semistruct_encoding_type` is deprecated. Instead, use `semistruct_properties`.
SEMISTRUCT_PROPERTIES	Optional. Specifies the encoding type for semi-structured data in key-value pairs. Valid values: `encoding_type`: specifies the encoding type for semi-structured data. Valid values: `encoding` or `none`. Default value: `none`. `freq_threshold`: specifies the frequency threshold. Valid value range: `(0, 100]`. Default value: `90`. Paths in JSON objects that appear with a frequency higher than this threshold are stored as independent subcolumns. These subcolumns are called frequent columns. A lower `freq_threshold` value increases the number of frequent columns, thereby improving the compression ratio, but also increases the query performance overhead. For more information about how to use semi-structured encoding, see Use semi-structured encoding. Notice This parameter is supported starting from V4.4.1.
MICRO_BLOCK_FORMAT_VERSION	Optional. Specifies the microblock storage format version of the table. Valid values: [1, +∞). Valid values: `1`: indicates that the microblock storage format is the same as the existing one. `2`: indicates that the new flat row storage format is enabled. Other values: return an illegal error. Note This parameter is introduced starting from V4.4.1.
STORAGE_CACHE_POLICY	Optional. Specifies the hot cache policy for tables, partitions, and indexes in shared storage mode. If set, the specified data is cached from object storage to the local cloud disk. Otherwise, the system will adaptively cache the data. For more information, see storage_cache_policy.

table_mode_value

Note

The following TABLE_MODE modes, except for the NORMAL mode, represent QUEUING tables. The QUEUING table is the most basic table type. The following modes, except for the NORMAL mode, represent more aggressive compaction strategies.

NORMAL: The default value. This mode indicates normal. In this mode, the probability of triggering a major compaction after a data dump is very low.
QUEUING: In this mode, the probability of triggering a major compaction after a data dump is low.
MODERATE: This mode indicates moderate. In this mode, the probability of triggering a major compaction after a data dump is moderate.
SUPER: This mode indicates super. In this mode, the probability of triggering a major compaction after a data dump is high.
EXTREME: This mode indicates extreme. In this mode, the probability of triggering a major compaction after a data dump is high.

For more information about compaction, see Adaptive compaction.

tokenizer_option

SPACE: The default value. This option indicates that the tokenizer splits data by space. You can specify the following parameters:

Parameter

Value range

min_token_size [1, 16]

max_token_size [10, 84]
NGRAM: This option indicates that the tokenizer splits data by N-Grams (Chinese). You can specify the following parameters:

Parameter

Value range

ngram_token_size [1, 10]
NGRAM2: This option indicates that the tokenizer splits data into consecutive characters in the range of min_ngram_size to max_ngram_size. You can specify the following parameters:

Parameter

Value range

min_ngram_size [1, 16]

max_ngram_size [1, 16]
BENG: This option indicates that the tokenizer splits data by Beng (basic English). You can specify the following parameters:

Parameter

Value range

min_token_size [1, 16]

max_token_size [10, 84]
IK: This option indicates that the tokenizer splits data by IK (Chinese). Currently, only the utf-8 character set is supported. You can specify the following parameter:
Parameter

Value range
ik_mode

smart

max_word

Parameter	Value range
min_token_size	[1, 16]
max_token_size	[10, 84]

Parameter	Value range
ngram_token_size	[1, 10]

Parameter	Value range
min_ngram_size	[1, 16]
max_ngram_size	[1, 16]

Parameter	Value range
min_token_size	[1, 16]
max_token_size	[10, 84]

Parameter	Value range
ik_mode	`smart` `max_word`

parser_properties

min_token_size: The minimum token length. The default value is 3. Value range: 1 to 16.
max_token_size: The maximum token length. The default value is 84. Value range: 10 to 84.
ngram_token_size: The token length for the NGRAM tokenizer. This parameter is valid only for the NGRAM tokenizer. The default value is 2. Value range: 1 to 10.
ik_mode: The tokenization mode for the IK tokenizer. Valid values:
- smart: The default value. This mode uses the dictionary to improve the accuracy of tokenization. The boundaries of the dictionary words are prioritized, which may reduce unnecessary expansions.
- max_word: This mode recognizes the words defined in the dictionary but does not affect the maximum expansion of tokenization. Even if the dictionary defines the words, the max_word mode still tries to split the text into more words.
min_ngram_size: The minimum token length. Value range: [1,16].
max_ngram_size: The maximum token length. Value range: [1,16].

Here is an example:

CREATE TABLE tbl1 (col1 VARCHAR(200), col2 TEXT,
    FULLTEXT INDEX fidx(col2) WITH PARSER ik PARSER_PROPERTIES=(ik_mode='max_word'));

dynamic_partition_policy_option

ENABLE = {true | false}: Optional. Specifies whether to enable dynamic partition management. This parameter can be modified. Valid values:
- true: Default value. Indicates that dynamic partition management is enabled.
- false: Indicates that dynamic partition management is disabled.
TIME_UNIT = {'hour' | 'day' | 'week' | 'month' | 'year'}: Required. Specifies the time unit for partitioning, that is, the interval for automatically creating partition boundaries. This parameter cannot be modified. Valid values:
- hour: Partitions are created by hour.
- day: Partitions are created by day.
- week: Partitions are created by week.
- month: Partitions are created by month.
- year: Partitions are created by year.
PRECREATE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}: Optional. Specifies the time for pre-creating partitions. When dynamic partition management is scheduled, partitions are pre-created so that the upper bound of the maximum partition > now() + precreate_time. This parameter can be modified. Valid values:
- -1: Default value. Indicates that no partitions are pre-created.
- 0: Indicates that only the current partition is pre-created.
- n {hour | day | week | month | year}: Indicates the time span for pre-creating partitions. For example, 3 hour indicates that partitions are pre-created for 3 hours.
Note
- When multiple partitions are to be pre-created, the partition boundaries are spaced by TIME_UNIT.
- The boundary of the first pre-created partition is the ceiling of the existing maximum partition boundary based on TIME_UNIT.
EXPIRE_TIME = {'-1' | '0' | 'n {hour | day | week | month | year}'}: Optional. Specifies the expiration time of partitions. When dynamic partition management is scheduled, all partitions with upper bounds < now() - expire_time are deleted. This parameter can be modified. Valid values:
- -1: Default value. Indicates that partitions never expire.
- 0: Indicates that all partitions except the current one have expired.
- n {hour | day | week | month | year}: Indicates the expiration time of partitions. For example, 1 day indicates that partitions expire after 1 day.
TIME_ZONE = {'default' | 'time_zone'}: Optional. Specifies the time zone information for determining the current time and the partition key values of the date, datetime, and year types. This parameter cannot be modified. Valid values:
- default: Default value. Indicates that no time zone is specified and the tenant time zone is used. For other types, the time_zone field must be default.
- time_zone: Indicates a custom time zone offset. For example, +8:00.
BIGINT_PRECISION = {'none'| 'ms' | 's'}: Optional. Specifies the timestamp precision of the bigint type partition key. This parameter cannot be modified. Valid values:
- none: Default value. Indicates no precision (the partition key is not of the bigint type).
- ms: Indicates millisecond precision.
- s: Indicates second precision.

For more information about how to create a dynamic partition table, see Create a dynamic partition table.

Here is an example:

CREATE TABLE tbl2 (col1 INT, col2 DATETIME)
    DYNAMIC_PARTITION_POLICY(
        ENABLE = true,
        TIME_UNIT = 'hour',
        PRECREATE_TIME = '3 hour',
        EXPIRE_TIME = '1 day',
        TIME_ZONE = '+8:00',
        BIGINT_PRECISION = 'none')
    PARTITION BY RANGE COLUMNS (col2)(
        PARTITION P0 VALUES LESS THAN ('2024-11-11 13:30:00')
        );

storage_cache_policy

Create a table-level storage_cache_policy

STORAGE_CACHE_POLICY is an optional parameter. If you do not specify the STORAGE_CACHE_POLICY option, the value of the tenant-level configuration item default_storage_cache_policy is used by default. The properties in storage_cache_policy_option are defined in the key-value format. The semantics of each property are as follows:

GLOBAL = {"hot"|"auto"|"none"}: specifies the hot cache strategy for the entire table. Valid values:
- hot: specifies that all data in the table is hot data. If the cache disk space is sufficient, all data in the table will be cached to the local cache disk.
- auto: specifies that the hot data in the table is automatically identified by the system.
- none: specifies that the strategy for this index follows the value of the STORAGE_CACHE_POLICY option of the parent table.
  
  Notice
  
  The none attribute can only be used for indexes.
timeline_strategy_list: specifies the list of time-axis strategy parameters. Parameters are separated by commas. The hot cache time-axis strategy supports a mechanism to determine whether partition data is hot data based on time. The system automatically adjusts the partition data on the local cache disk based on the configured strategy.
Note

A time-axis is a partitioning range defined by the Range partitioning method. The hot data in the partition is cached based on the partitioning range. When a certain time condition is met, the data in the partition is determined to be hot data. When you use the time-axis strategy, note the following:
- Only Range-partitioned tables are supported. Range partitioning can be at the primary or secondary level. This is because time is used to determine whether data has expired, but double Range partitioning is not supported. This is because it is not possible to determine which Range partition's time to use.
- When you use the time-axis strategy, the partitioning expression can contain only column names. Expressions are not supported. For example, PARTITION BY RANGE COLUMNS(expr(col3)) is not supported.
- BOUNDARY_COLUMN must be a partitioning key. If the partitioning key consists of multiple columns, BOUNDARY_COLUMN must be the first column, which is used to determine whether the partition has expired.
BOUNDARY_COLUMN = column_name: specifies the column used to determine hot data. Integer types (BIGINT or INT, in the Unix timestamp format) and time types (TIMESTAMP, DATE, DATETIME, or YEAR) are supported.
- If the BOUNDARY_COLUMN is of an integer type, the table can be partitioned by Range or Range Columns.
- If the BOUNDARY_COLUMN is of a time type, the table can only be partitioned by Range Columns.
- BOUNDARY_COLUMN_UNIT = {"s" | "ms"}: specifies the time unit of the BOUNDARY_COLUMN parameter. This parameter is supported only when the BOUNDARY_COLUMN is of an integer type. When the integer value is used as a timestamp, you must specify the timestamp unit. Otherwise, an error timestamp may be incorrectly parsed. Valid values:
  - If the partitioning column is of the INT type, the BOUNDARY_COLUMN_UNIT can only be s.
  - If the partitioning column is of the BIGINT type, the BOUNDARY_COLUMN_UNIT can be s or ms.
    
    Notice
    
    If the format is not a Unix timestamp, the time represented by the INT type cannot be correctly identified.
HOT_RETENTION = intnum retention_time_unit: specifies the time range for hot data.
- intnum: specifies an integer.
- retention_time_unit: specifies the time unit. Valid values:
  - YEAR: specifies years.
  - MONTH: specifies months.
  - WEEK: specifies weeks.
  - DAY: specifies days.
  - HOUR: specifies hours.
  - MINUTE: specifies minutes.

Create a partition-level storage_cache_policy

The STORAGE_CACHE_POLICY option at the partition level is optional. Valid values:

hot: specifies that all data in the partition is hot data. If the cache disk space is sufficient, all data in the partition will be cached to the local cache disk.
auto: specifies that the hot data in the partition is automatically identified by the system.
none: the default value. Specifies that the hot cache strategy for the partition follows the value of the STORAGE_CACHE_POLICY option at the upper level.

Examples

Create a database table.

obclient> CREATE TABLE tbl1 (c1 INT PRIMARY KEY, c2 VARCHAR(50));
Query OK, 0 rows affected

Create a table with an index.

obclient> CREATE TABLE tbl2 (c1 INT PRIMARY KEY, c2 INT, c3 INT, INDEX i1 (c2));
Query OK, 0 rows affected

Create a table tbl3 with a function index.

obclient> CREATE TABLE tbl3 (c1 INT, c2 INT, INDEX i1 ((c1+1)), UNIQUE KEY ((c1+c2)));
Query OK, 0 rows affected

obclient> SHOW CREATE TABLE tbl3;
+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table                                                                                                                                                                                                                                                                                                                                                                     |
+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| tbl3    | CREATE TABLE `tbl3` (
`c1` int(11) DEFAULT NULL,
`c2` int(11) DEFAULT NULL,
UNIQUE KEY `functional_index` ((`c1` + `c2`)) BLOCK_SIZE 16384 LOCAL,
KEY `i1` ((`c1` + 1)) BLOCK_SIZE 16384 LOCAL
) DEFAULT CHARSET = utf8mb4 ROW_FORMAT = DYNAMIC COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 2 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 0 |
+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

Create a table with 8 hash partitions.

obclient> CREATE TABLE tbl4 (c1 INT PRIMARY KEY, c2 INT) PARTITION BY HASH(c1) PARTITIONS 8;
Query OK, 0 rows affected

Create a table with range partitions as the primary partitions and key partitions as the subpartitions.

obclient> CREATE TABLE tbl5 (c1 INT, c2 INT, c3 INT) PARTITION BY RANGE(c1)
     SUBPARTITION BY KEY(c2, c3) SUBPARTITIONS 5
     (PARTITION p0 VALUES LESS THAN(0), PARTITION p1 VALUES LESS THAN(100));
Query OK, 0 rows affected

Create a table with one column of type gbk and one column of type utf8.

obclient> CREATE TABLE tbl6 (c1 VARCHAR(10),
                 c2 VARCHAR(10) CHARSET GBK COLLATE gbk_bin)
             DEFAULT CHARSET utf8 COLLATE utf8mb4_general_ci;
Query OK, 0 rows affected

Enable encoding and use zstd compression. Set the reserved space for macroblocks to 5%.

obclient> CREATE TABLE tbl7 (c1 INT, c2 INT, c3 VARCHAR(64))
      COMPRESSION 'zstd_1.0'
      ROW_FORMAT DYNAMIC
      PCTFREE 5;
Query OK, 0 rows affected

Create a table tbl8 and set the parallelism to 3.

obclient> CREATE TABLE tbl8(c1 INT PRIMARY KEY, c2 INT) PARALLEL 3;
Query OK, 0 rows affected

Use an auto-increment column as the partitioning key.

obclient> CREATE TABLE tbl9(inv_id BIGINT NOT NULL AUTO_INCREMENT,c1 BIGINT,
PRIMARY KEY (inv_id) ) PARTITION BY HASH(inv_id) PARTITIONS 8;
Query OK, 0 rows affected

Specify a foreign key for the ref_t2 table. When an UPDATE operation affects the key values in the parent table that match the rows in the child table, execute the SET NULL operation.

obclient> CREATE TABLE ref_t1(c1 INT PRIMARY KEY, C2 INT);
Query OK, 0 rows affected

obclient> CREATE TABLE ref_t2(c1 INT PRIMARY KEY, C2 INT,FOREIGN KEY(c2) REFERENCES ref_t1(c1) ON UPDATE SET NULL);
Query OK, 0 rows affected

Create a table tbl10 with a CHECK constraint and view the constraint information.

obclient> CREATE TABLE tbl10 (col1 INT, col2 INT, col3 INT, CONSTRAINT equal_check1 CHECK(col1 = col3 * 2));
Query OK, 0 rows affected

obclient> SHOW CREATE TABLE tbl10;
+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table                                                                                                                                                                                                                                                                                                                                               |
+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| tbl10  | CREATE TABLE `tbl10` (
  `col1` int(11) DEFAULT NULL,
  `col2` int(11) DEFAULT NULL,
  `col3` int(11) DEFAULT NULL,
  CONSTRAINT `equal_check1` CHECK ((`col1` = (`col3` * 2)))
) DEFAULT CHARSET = utf8mb4 ROW_FORMAT = DYNAMIC COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 1 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 0 |
+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set

Create a table tbl11 based on the tbl10 table and view the table information.

obclient> CREATE TABLE tbl11 LIKE tbl10;
Query OK, 0 rows affected

obclient> SHOW CREATE TABLE tbl11;
+-------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table                                                                                                                                                                                                                                                                                                                                                                |
+-------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| tbl11  | CREATE TABLE `tbl11` (
  `col1` int(11) DEFAULT NULL,
  `col2` int(11) DEFAULT NULL,
  `col3` int(11) DEFAULT NULL,
  CONSTRAINT `tbl11_OBCHECK_1650793233327894` CHECK ((`col1` = (`col3` * 2)))
) DEFAULT CHARSET = utf8mb4 ROW_FORMAT = DYNAMIC COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 1 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 0 |
+-------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set

Create a cluster-level replicated table dup_t1.

obclient> CREATE RESOURCE UNIT IF NOT EXISTS 2c5g MAX_CPU 2, MEMORY_SIZE '5G';
Query OK, 0 rows affected

Create a resource pool with a unit number of 2.

obclient> CREATE RESOURCE POOL tenant_pool UNIT = '2c5g', UNIT_NUM = 2, ZONE_LIST = ('z1', 'z2', 'z3');
Query OK, 0 rows affected

Create a user tenant obmysql and specify the locality distribution.

obclient> CREATE TENANT obmysql resource_pool_list=('tenant_pool'), LOCALITY = "F@z1, F@z2, R@z3", PRIMARY_ZONE = "z1" SET ob_tcp_invited_nodes='%';
Query OK, 0 rows affected

obclient> USE test;
Database changed
obclient> CREATE TABLE dup_t1(c1 INT) DUPLICATE_SCOPE = 'cluster';
Query OK, 0 rows affected

(Optional) View the broadcast log stream information. The replicated table will be created on this log stream.

obclient> SELECT * FROM oceanbase.DBA_OB_LS WHERE FLAG LIKE "%DUPLICATE%";
+-------+--------+--------------+---------------+-------------+---------------------+----------+---------------------+---------------------+-----------+-----------+
| LS_ID | STATUS | PRIMARY_ZONE | UNIT_GROUP_ID | LS_GROUP_ID | CREATE_SCN          | DROP_SCN | SYNC_SCN            | READABLE_SCN        | FLAG      | UNIT_LIST |
+-------+--------+--------------+---------------+-------------+---------------------+----------+---------------------+---------------------+-----------+-----------+
|  1003 | NORMAL | z1;z2        |             0 |           0 | 1683267390195713284 |     NULL | 1683337744205408139 | 1683337744205408139 | DUPLICATE |           |
+-------+--------+--------------+---------------+-------------+---------------------+----------+---------------------+---------------------+-----------+-----------+
1 row in set

(Optional) View the replica distribution of the replicated table under the sys tenant. The REPLICA_TYPE field indicates the replica type.

obclient> SELECT * FROM oceanbase.CDB_OB_TABLE_LOCATIONS WHERE TABLE_NAME = "dup_t1";
+-----------+---------------+------------+----------+------------+----------------+-------------------+------------+---------------+-----------+-------+------+----------------+----------+----------+--------------+-----------------+
| TENANT_ID | DATABASE_NAME | TABLE_NAME | TABLE_ID | TABLE_TYPE | PARTITION_NAME | SUBPARTITION_NAME | INDEX_NAME | DATA_TABLE_ID | TABLET_ID | LS_ID | ZONE | SVR_IP         | SVR_PORT | ROLE     | REPLICA_TYPE | DUPLICATE_SCOPE |
+-----------+---------------+------------+----------+------------+----------------+-------------------+------------+---------------+-----------+-------+------+----------------+----------+----------+--------------+-----------------+
|      1002 | test          | dup_t1     |   500002 | USER TABLE | NULL           | NULL              | NULL       |          NULL |    200001 |  1003 | z1   | 11.xxx.xxx.xxx |    36125 | LEADER   | FULL         | CLUSTER         |
|      1002 | test          | dup_t1     |   500002 | USER TABLE | NULL           | NULL              | NULL       |          NULL |    200001 |  1003 | z1   | 11.xxx.xxx.xxx  |    36124 | FOLLOWER | READONLY     | CLUSTER         |
|      1002 | test          | dup_t1     |   500002 | USER TABLE | NULL           | NULL              | NULL       |          NULL |    200001 |  1003 | z2   | 11.xxx.xxx.xxx |    36127 | FOLLOWER | FULL         | CLUSTER         |
|      1002 | test          | dup_t1     |   500002 | USER TABLE | NULL           | NULL              | NULL       |          NULL |    200001 |  1003 | z2   | 11.xxx.xxx.xxx |    36126 | FOLLOWER | READONLY     | CLUSTER         |
|      1002 | test          | dup_t1     |   500002 | USER TABLE | NULL           | NULL              | NULL       |          NULL |    200001 |  1003 | z3   | 11.xxx.xxx.xxx |    36128 | FOLLOWER | READONLY     | CLUSTER         |
|      1002 | test          | dup_t1     |   500002 | USER TABLE | NULL           | NULL              | NULL       |          NULL |    200001 |  1003 | z3   | 11.xxx.xxx.xxx  |    36129 | FOLLOWER | READONLY     | CLUSTER         |
+-----------+---------------+------------+----------+------------+----------------+-------------------+------------+---------------+-----------+-------+------+----------------+----------+----------+--------------+-----------------+
6 rows in set

Insert and read/write data in the replicated table like a regular table. For a read request, if you use a proxy, the read request may be routed to any OBServer node. If you connect directly to an OBServer node, the read request will be executed on the connected OBServer node as long as the local replica is readable.
```
obclient> INSERT INTO dup_t1 VALUES(1);
Query OK, 1 row affected

obclient> SELECT * FROM dup_t1;
+------+
| c1   |
+------+
|    1 |
+------+
1 row in set
```

Create a columnstore table tbl1_cg.

obclient> CREATE TABLE tbl1_cg (col1 INT PRIMARY KEY, col2 VARCHAR(50)) WITH COLUMN GROUP(each column);

Create a table with a columnstore index.

obclient> CREATE TABLE tbl2_cg (col1 INT PRIMARY KEY, col2 INT, col3 INT, INDEX i1 (col2) WITH COLUMN GROUP(each column));

Create a columnstore table with a columnstore index.

obclient> CREATE TABLE tbl3_cg (col1 INT PRIMARY KEY, col2 INT, col3 INT, INDEX i1 (col2) WITH COLUMN GROUP(each column)) WITH COLUMN GROUP(each column);

Specify the Skip Index attribute for a column when creating a table.

obclient> CREATE TABLE test_skidx(
  col1 INT SKIP_INDEX(MIN_MAX, SUM),
  col2 FLOAT SKIP_INDEX(MIN_MAX),
  col3 VARCHAR(1024) SKIP_INDEX(MIN_MAX),
  col4 CHAR(10)
);

Create a table tbl1 with an integer column col1 and specify that the operation uses 5 parallel threads. Also, specify that the data content of the new table tbl1 will be sourced from the query results of table tbl2.
```
obclient> CREATE /*+ parallel(5) */ TABLE tbl1 (col1 INT) AS SELECT col1 FROM tbl2;
```

Create a table tb with a persistent bloom filter at the macroblock level.

obclient> CREATE TABLE tb(c1 INT PRIMARY KEY, c2 INT) enable_macro_block_bloom_filter = True;

Create a table tbl1 and specify the semi-structured encoding type as encoding and the frequency threshold as 90.

obclient> CREATE TABLE tbl1 (col1 INT PRIMARY KEY, col2 INT) SEMISTRUCT_PROPERTIES=(ENCODING_TYPE=ENCODING, FREQ_THRESHOLD=90);

Create a table tb and enable the new flat row storage format (version 2).

obclient> CREATE TABLE tb(c1 INT PRIMARY KEY, c2 INT) micro_block_format_version = 2;

Create a table-level STORAGE_CACHE_POLICY

Manually specify hot data.

CREATE TABLE test_table1 (c1 INT, c2 INT) storage_cache_policy (global = "hot");

Specify hot data using the time axis method.

CREATE TABLE test_part_func_expr (
id INT,
event_time DATETIME NOT NULL,
data VARCHAR(100))
STORAGE_CACHE_POLICY (
  BOUNDARY_COLUMN = event_time,
  HOT_RETENTION =1 YEAR
)
PARTITION BY RANGE COLUMNS(`event_time`) (
  PARTITION p_prev3h VALUES LESS THAN ("2018-01-01 00:00:00"), 
  PARTITION p_prev2h VALUES LESS THAN ("2019-01-01 00:00:00"), 
  PARTITION p_prev1h VALUES LESS THAN ("2020-01-01 00:00:00"), 
  PARTITION p_current VALUES LESS THAN ("2021-01-01 00:00:00"), 
  PARTITION p_next1h VALUES LESS THAN ("2022-01-01 00:00:00"), 
  PARTITION p_max VALUES LESS THAN MAXVALUE 
);

Create a partition-level STORAGE_CACHE_POLICY

Specify the STORAGE_CACHE_POLICY for the primary partitions when creating a partitioned table.

CREATE TABLE tbl3 (col1 INT, col2 INT, col3 INT)
  PARTITION BY RANGE(col1)
  SUBPARTITION BY KEY(col2, col3) SUBPARTITIONS 5
    (PARTITION p0 VALUES LESS THAN(0) STORAGE_CACHE_POLICY = "hot",
     PARTITION p1 VALUES LESS THAN(100) STORAGE_CACHE_POLICY = "auto"
    );

Specify the STORAGE_CACHE_POLICY for the subpartitions when creating a partitioned table.

CREATE TABLE tbl4 (col1 INT, col2 INT)
  PARTITION BY RANGE(col1)
  SUBPARTITION BY RANGE(col2)
    (PARTITION p0 VALUES LESS THAN(100)
        (SUBPARTITION sp0 VALUES LESS THAN(2020) STORAGE_CACHE_POLICY = "hot",
         SUBPARTITION sp1 VALUES LESS THAN(2021)),
    PARTITION p1 VALUES LESS THAN(200)
        (SUBPARTITION sp2 VALUES LESS THAN(2020),
         SUBPARTITION sp3 VALUES LESS THAN(2021) STORAGE_CACHE_POLICY = "hot",
         SUBPARTITION sp4 VALUES LESS THAN(2022))
    );

Specify the STORAGE_CACHE_POLICY for both the index and the table partitions when creating a table.
```
CREATE TABLE tbl5 (
c1 INT,
c2 VARCHAR(50),
INDEX idx1 (c1) STORAGE_CACHE_POLICY = (GLOBAL = "hot"),
INDEX idx2 (c2) STORAGE_CACHE_POLICY = (GLOBAL = "auto")
) PARTITION BY RANGE (c1)
(PARTITION p1 VALUES LESS THAN (100) STORAGE_CACHE_POLICY = "hot",
PARTITION p2 VALUES LESS THAN (200) STORAGE_CACHE_POLICY = "auto",
PARTITION p3 VALUES LESS THAN MAXVALUE STORAGE_CACHE_POLICY = "none");
```
Example explanation:
- When creating the table tbl5, two indexes are created, and each index specifies a different storage cache policy.
- The table is partitioned by the c1 column using range partitioning, and each partition specifies a different storage cache policy.
- The STORAGE_CACHE_POLICY for the index and the table partitions can be set independently, enabling fine-grained storage management.

OceanBase

Customer Stories

Documentation

CREATE TABLE

Purpose

Syntax

Parameters

Note

Notice

Notice

Note

Notice

Notice

Notice

Note

Notice

Notice

Note

table_mode_value

Note

tokenizer_option

parser_properties

dynamic_partition_policy_option

Note

storage_cache_policy

Create a table-level storage_cache_policy

Notice

Note

Notice

Create a partition-level storage_cache_policy

Examples

Create a table-level STORAGE_CACHE_POLICY

Create a partition-level STORAGE_CACHE_POLICY

References