# ./pullrev.sh 1542774 1542765 1542767 http://svn.apache.org/viewvc?view=revision&revision=1542774 http://svn.apache.org/viewvc?view=revision&revision=1542767 http://svn.apache.org/viewvc?view=revision&revision=1542765 --- subversion-1.8.5/subversion/libsvn_wc/upgrade.c.r1542774+ +++ subversion-1.8.5/subversion/libsvn_wc/upgrade.c @@ -1958,6 +1958,10 @@ svn_wc__upgrade_sdb(int *result_format, case SVN_WC__VERSION: /* already upgraded */ *result_format = SVN_WC__VERSION; + + SVN_SQLITE__WITH_LOCK( + svn_wc__db_install_schema_statistics(sdb, scratch_pool), + sdb); } #ifdef SVN_DEBUG --- subversion-1.8.5/subversion/libsvn_wc/wc_db.c.r1542774+ +++ subversion-1.8.5/subversion/libsvn_wc/wc_db.c @@ -1390,6 +1390,15 @@ does_node_exist(svn_boolean_t *exists, return svn_error_trace(svn_sqlite__reset(stmt)); } +svn_error_t * +svn_wc__db_install_schema_statistics(svn_sqlite__db_t *sdb, + apr_pool_t *scratch_pool) +{ + SVN_ERR(svn_sqlite__exec_statements(sdb, STMT_INSTALL_SCHEMA_STATISTICS)); + + return SVN_NO_ERROR; +} + /* Helper for create_db(). Initializes our wc.db schema. */ static svn_error_t * @@ -1417,6 +1426,8 @@ init_db(/* output values */ SVN_ERR(create_repos_id(repos_id, repos_root_url, repos_uuid, db, scratch_pool)); + SVN_ERR(svn_wc__db_install_schema_statistics(db, scratch_pool)); + /* Insert the wcroot. */ /* ### Right now, this just assumes wc metadata is being stored locally. */ SVN_ERR(svn_sqlite__get_statement(&stmt, db, STMT_INSERT_WCROOT)); --- subversion-1.8.5/subversion/libsvn_wc/wc_db.h.r1542774+ +++ subversion-1.8.5/subversion/libsvn_wc/wc_db.h @@ -2831,6 +2831,16 @@ svn_wc__db_scan_deletion(const char **ba @{ */ +/* Installs or updates Sqlite schema statistics for the current (aka latest) + working copy schema. + + This function should be called once on initializing the database and after + an schema update completes */ +svn_error_t * +svn_wc__db_install_schema_statistics(svn_sqlite__db_t *sdb, + apr_pool_t *scratch_pool); + + /* Create a new wc.db file for LOCAL_DIR_ABSPATH, which is going to be a working copy for the repository REPOS_ROOT_URL with uuid REPOS_UUID. Return the raw sqlite handle, repository id and working copy id --- subversion-1.8.5/subversion/libsvn_wc/wc-metadata.sql.r1542774+ +++ subversion-1.8.5/subversion/libsvn_wc/wc-metadata.sql @@ -573,6 +573,54 @@ CREATE UNIQUE INDEX I_EXTERNALS_DEFINED local_relpath); /* ------------------------------------------------------------------------- */ +/* This statement provides SQLite with the necessary information about our + indexes to make better decisions in the query planner. + + For every interesting index this contains a number of rows where the + statistics ar calculated for and then for every column in the index the + average number of rows with the same value in all columns left of this + column including the column itself. + + See http://www.sqlite.org/fileformat2.html#stat1tab for more details. + + The important thing here is that this tells Sqlite that the wc_id column + of the NODES and ACTUAL_NODE table is usually a single value, so queries + should use more than one column for index usage. + + The current hints describe NODES+ACTUAL_NODE as a working copy with + 8000 nodes in 1 a single working copy(=wc_id), 10 nodes per directory + and an average of 2 op-depth layers per node. + + The number of integers must be number of index columns + 1, which is + verified via the test_schema_statistics() test. + */ +-- STMT_INSTALL_SCHEMA_STATISTICS +ANALYZE sqlite_master; /* Creates empty sqlite_stat1 if necessary */ + +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('NODES', 'sqlite_autoindex_NODES_1', '8000 8000 2 1'); +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('NODES', 'I_NODES_PARENT', '8000 8000 10 2 1'); +/* Tell a lie: We ignore that 99.9% of all moved_to values are NULL */ +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('NODES', 'I_NODES_MOVED', '8000 8000 1 1'); + +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('ACTUAL_NODE', 'sqlite_autoindex_ACTUAL_NODE_1', '8000 8000 1'); +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('ACTUAL_NODE', 'I_ACTUAL_PARENT', '8000 8000 10 1'); + +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('LOCK', 'sqlite_autoindex_LOCK_1', '100 100 1'); + +INSERT OR REPLACE INTO sqlite_stat1(tbl, idx, stat) VALUES + ('WC_LOCK', 'sqlite_autoindex_WC_LOCK_1', '100 100 1'); + +/* sqlite_autoindex_WORK_QUEUE_1 doesn't exist because WORK_QUEUE is + a INTEGER PRIMARY KEY AUTOINCREMENT table */ + +ANALYZE sqlite_master; /* Loads sqlite_stat1 data for query optimizer */ +/* ------------------------------------------------------------------------- */ /* Format 20 introduces NODES and removes BASE_NODE and WORKING_NODE */ --- subversion-1.8.5/subversion/tests/libsvn_wc/wc-queries-test.c.r1542774+ +++ subversion-1.8.5/subversion/tests/libsvn_wc/wc-queries-test.c @@ -77,6 +77,7 @@ static const int schema_statements[] = STMT_CREATE_NODES, STMT_CREATE_NODES_TRIGGERS, STMT_CREATE_EXTERNALS, + STMT_INSTALL_SCHEMA_STATISTICS, /* Memory tables */ STMT_CREATE_TARGETS_LIST, STMT_CREATE_CHANGELIST_LIST, @@ -95,8 +96,8 @@ static const int slow_statements[] = /* Operate on the entire WC */ STMT_SELECT_ALL_NODES, /* schema validation code */ - /* Is there a record? ### Can we somehow check for LIMIT 1? */ - STMT_LOOK_FOR_WORK, + /* Updates all records for a repository (designed slow) */ + STMT_UPDATE_LOCK_REPOS_ID, /* Full temporary table read */ STMT_INSERT_ACTUAL_EMPTIES, @@ -114,6 +115,19 @@ static const int slow_statements[] = -1 /* final marker */ }; +/* Statements that just read the first record from a table, + using the primary key. Specialized as different sqlite + versions produce different results */ +static const int primary_key_statements[] = +{ + /* Is there a record? ### Can we somehow check for LIMIT 1, + and primary key instead of adding a list? */ + STMT_LOOK_FOR_WORK, + STMT_SELECT_WORK_ITEM, + + -1 /* final marker */ +}; + /* Helper function to determine if a statement is in a list */ static svn_boolean_t in_list(const int list[], int stmt_idx) @@ -529,6 +543,7 @@ is_node_table(const char *table_name) return (apr_strnatcasecmp(table_name, "nodes") == 0 || apr_strnatcasecmp(table_name, "actual_node") == 0 || apr_strnatcasecmp(table_name, "externals") == 0 + || apr_strnatcasecmp(table_name, "lock") == 0 || apr_strnatcasecmp(table_name, "wc_lock") == 0 || FALSE); } @@ -651,14 +666,24 @@ test_query_expectations(apr_pool_t *scra || (item->expression_vars < 1)) && !is_result_table(item->table)) { - warned = TRUE; - if (!is_slow_statement(i)) - warnings = svn_error_createf(SVN_ERR_TEST_FAILED, warnings, + if (in_list(primary_key_statements, i)) + { + /* Reported as primary key index usage in Sqlite 3.7, + as table scan in 3.8+, while the execution plan is + identical: read first record from table */ + } + else if (!is_slow_statement(i)) + { + warned = TRUE; + warnings = svn_error_createf(SVN_ERR_TEST_FAILED, warnings, "%s: " "Uses %s with only %d index component: (%s)\n%s", wc_query_info[i][0], item->table, item->expression_vars, item->expressions, wc_queries[i]); + } + else + warned = TRUE; } else if (item->search && !item->index) { @@ -718,6 +743,144 @@ test_query_expectations(apr_pool_t *scra return warnings; } +/* Helper to verify a bit of data in the sqlite3 statistics */ +static int +parse_stat_data(const char *stat) +{ + int n = 0; + apr_int64_t last = APR_INT64_MAX; + while (*stat) + { + apr_int64_t v; + char *next; + + if (*stat < '0' || *stat > '9') + return -2; + + errno = 0; + v = apr_strtoi64(stat, &next, 10); + + /* All numbers specify the average number of rows + with the same values in all columns left of it, + so the value must be >= 1 and lower than or equal + to all previous seen numbers */ + if (v <= 0 || (v > last) || (errno != 0)) + return -1; + + last = v; + + n++; + stat = next; + + if (*stat == ' ') + stat++; + } + + return n; +} + +static svn_error_t * +test_schema_statistics(apr_pool_t *scratch_pool) +{ + sqlite3 *sdb; + sqlite3_stmt *stmt; + + SVN_ERR(create_memory_db(&sdb, scratch_pool)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "CREATE TABLE shadow_stat1(tbl TEXT, idx TEXT, stat TEXT)", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "INSERT INTO shadow_stat1 (tbl, idx, stat) " + "SELECT tbl, idx, stat FROM sqlite_stat1", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "DROP TABLE sqlite_stat1", + NULL, NULL, NULL)); + + /* Insert statement to give index at least 1 record */ + SQLITE_ERR( + sqlite3_exec(sdb, + "INSERT INTO nodes (wc_id, local_relpath, op_depth," + " presence, kind) " + "VALUES (1, '', 0, 'normal', 'dir')", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "INSERT INTO actual_node (wc_id, local_relpath) " + "VALUES (1, '')", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "INSERT INTO lock (repos_id, repos_relpath, lock_token) " + "VALUES (1, '', '')", + NULL, NULL, NULL)); + + /* These are currently not necessary for query optimization, but it's better + to tell Sqlite how we intend to use this table anyway */ + SQLITE_ERR( + sqlite3_exec(sdb, + "INSERT INTO wc_lock (wc_id, local_dir_relpath) " + "VALUES (1, '')", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "INSERT INTO WORK_QUEUE (work) " + "VALUES ('')", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_exec(sdb, + "ANALYZE", + NULL, NULL, NULL)); + + SQLITE_ERR( + sqlite3_prepare(sdb, "SELECT s.tbl, s.idx, s.stat, r.stat " + "FROM shadow_stat1 s " + "LEFT JOIN sqlite_stat1 r ON " + "s.tbl=r.tbl and s.idx=r.idx", + -1, &stmt, NULL)); + + while (sqlite3_step(stmt) == SQLITE_ROW) + { + const char *wc_stat = (const char*)sqlite3_column_text(stmt, 2); + const char *sqlite_stat = (const char*)sqlite3_column_text(stmt, 3); + + if (! sqlite_stat) + { + return svn_error_createf(SVN_ERR_TEST_FAILED, NULL, + "Schema statistic failure:" + " Refering to unknown index '%s' on '%s'", + sqlite3_column_text(stmt, 1), + sqlite3_column_text(stmt, 0)); + } + + if (parse_stat_data(wc_stat) != parse_stat_data(sqlite_stat)) + { + return svn_error_createf(SVN_ERR_TEST_FAILED, NULL, + "Schema statistic failure:" + " Column mismatch for '%s' on '%s'", + sqlite3_column_text(stmt, 1), + sqlite3_column_text(stmt, 0)); + } + } + + SQLITE_ERR(sqlite3_reset(stmt)); + SQLITE_ERR(sqlite3_finalize(stmt)); + + SQLITE_ERR(sqlite3_close(sdb)); /* Close the DB if ok; otherwise leaked */ + + return SVN_NO_ERROR; +} + struct svn_test_descriptor_t test_funcs[] = { SVN_TEST_NULL, @@ -729,3 +892,5 @@ struct svn_test_descriptor_t test_funcs[ "test query expectations"), SVN_TEST_NULL }; + SVN_TEST_PASS2(test_schema_statistics, + "test schema statistics"),