Skip to content

Deadlock when inserting data concurrently(批量插入数据时死锁) #17

@Sube-py

Description

@Sube-py

Concurrent size: about 10
Tokenizer: jieba(use custom model, disable trigger)
pg log:

6373::JSONB, $6374::JSONB, $6375, $6376::VARCHAR, $6377, $6378::INTEGER, $6379::INTEGER, nextval('embeddings_id_seq'), $6380::INTEGER, 637), ($6381::BIGINT, $6382::INTEGER[], $6383::JSONB, $6384::JSONB, $6385, $6386::VARCHAR, $6387, $6388::INTEGER, $6389::INTEGER, nextval('embeddings_id_seq'), $6390::INTEGER, 638), ($6391::BIGINT, $6392::INTEGER[], $6393::JSONB, $6394::JSONB, $6395, $6396::VARCHAR, $6397, $6398::INTEGER, $6399::INTEGER, nextval('embeddings_id_seq'), $6400::INTEGER, 639)) AS imp_sen(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, sen_counter) ORDER BY sen_counter RETURNING embeddings.id, embeddings.id AS id__1
2025-07-04 07:18:24.113 UTC [469] ERROR:  canceling statement due to user request
2025-07-04 07:18:24.113 UTC [469] CONTEXT:  SQL statement "
	    WITH 
	    new_tokens AS (
	        SELECT unnest(tokenizer_catalog.apply_text_analyzer_for_custom_model($1.text, 'jieba_analyzer')) AS token
	    ),
	    to_insert AS (
	        SELECT token FROM new_tokens
	        WHERE NOT EXISTS (
	            SELECT 1 FROM tokenizer_catalog."model_jieba_model" WHERE token = new_tokens.token
	        )
	    )
	    INSERT INTO tokenizer_catalog."model_jieba_model" (token) SELECT token FROM to_insert ON CONFLICT (token) DO NOTHING"
	PL/pgSQL function tokenizer_catalog.custom_model_insert_trigger() line 7 at EXECUTE
2025-07-04 07:18:24.113 UTC [469] STATEMENT:  INSERT INTO embeddings (upload_id, pages, rects, content, embedding, text, bm25_embedding, created_at, updated_at, id, deleted_at) SELECT p0::BIGINT, p1::INTEGER[], p2::JSONB, p3::JSONB, p4::VECTOR(1536), p5::VARCHAR, p6::bm25vector, p7::INTEGER, p8::INTEGER, p9::BIGINT, p10::INTEGER FROM (VALUES ($1::BIGINT, $2::INTEGER[], 

file: src/model/custom.rs

pgrx::extension_sql!(
    r#"
CREATE FUNCTION custom_model_insert_trigger()
RETURNS TRIGGER AS $$
DECLARE
    tokenizer_name TEXT := TG_ARGV[0];
    target_column TEXT := TG_ARGV[1];
    text_analyzer TEXT := TG_ARGV[2];
BEGIN
    EXECUTE format('
    WITH 
    new_tokens AS (
        SELECT unnest(tokenizer_catalog.apply_text_analyzer_for_custom_model($1.%I, %L)) AS token
    ),
    to_insert AS (
        SELECT token FROM new_tokens
        WHERE NOT EXISTS (
            SELECT 1 FROM tokenizer_catalog."model_%s" WHERE token = new_tokens.token
        )
    )
    INSERT INTO tokenizer_catalog."model_%s" (token) SELECT token FROM to_insert ON CONFLICT (token) DO NOTHING', target_column, text_analyzer, tokenizer_name, tokenizer_name) USING NEW;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;
    "#,
    name = "custom_model_insert_trigger"
);

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions