package sql
import "github.com/cyralinc/dmap/sql"
Package sql provides mechanisms to perform database introspection, sampling, and classification on various SQL data repositories. The Repository interface provides the API for performing database introspection and sampling. It encapsulates the concept of a Dmap data SQL repository. All out-of-the-box Repository implementations are included in their own files named after the repository type, e.g. mysql.go, postgres.go, etc.
Registry provides an API for registering and constructing Repository implementations within an application. There is a global DefaultRegistry which has all-out-of-the-box Repository implementations registered to it by default.
Scanner is a scan.RepoScanner implementation that can be used to perform data discovery and classification on SQL repositories.
Index
- Constants
- Variables
- func MustRegister(repoType string, constructor RepoConstructor)
- func Register(repoType string, constructor RepoConstructor) error
- func Unregister(repoType string)
- type AttributeMetadata
-
type DenodoRepository
- func NewDenodoRepository(cfg RepoConfig) (*DenodoRepository, error)
- func (r *DenodoRepository) Close() error
- func (r *DenodoRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *DenodoRepository) ListDatabases(_ context.Context) ([]string, error)
- func (r *DenodoRepository) Ping(ctx context.Context) error
- func (r *DenodoRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
-
type GenericRepository
- func NewGenericRepository(repoType, database, connStr string, maxOpenConns uint) (*GenericRepository, error)
- func NewGenericRepositoryFromDB(repoType, database string, db *sql.DB) *GenericRepository
- func (r *GenericRepository) Close() error
- func (r *GenericRepository) GetDb() *sql.DB
- func (r *GenericRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *GenericRepository) IntrospectWithQuery(ctx context.Context, query string, params IntrospectParameters) (*Metadata, error)
- func (r *GenericRepository) ListDatabases(_ context.Context) ([]string, error)
- func (r *GenericRepository) ListDatabasesWithQuery(ctx context.Context, query string, params ...any) ([]string, error)
- func (r *GenericRepository) Ping(ctx context.Context) error
- func (r *GenericRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
- func (r *GenericRepository) SampleTableWithQuery(ctx context.Context, query string, params SampleParameters) (Sample, error)
- type IntrospectParameters
- type Metadata
-
type MySqlRepository
- func NewMySqlRepository(cfg RepoConfig) (*MySqlRepository, error)
- func (r *MySqlRepository) Close() error
- func (r *MySqlRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *MySqlRepository) ListDatabases(ctx context.Context) ([]string, error)
- func (r *MySqlRepository) Ping(ctx context.Context) error
- func (r *MySqlRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
- type OracleConfig
-
type OracleRepository
- func NewOracleRepository(cfg RepoConfig) (*OracleRepository, error)
- func (r *OracleRepository) Close() error
- func (r *OracleRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *OracleRepository) ListDatabases(_ context.Context) ([]string, error)
- func (r *OracleRepository) Ping(ctx context.Context) error
- func (r *OracleRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
-
type PostgresRepository
- func NewPostgresRepository(cfg RepoConfig) (*PostgresRepository, error)
- func (r *PostgresRepository) Close() error
- func (r *PostgresRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *PostgresRepository) ListDatabases(ctx context.Context) ([]string, error)
- func (r *PostgresRepository) Ping(ctx context.Context) error
- func (r *PostgresRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
-
type RedshiftRepository
- func NewRedshiftRepository(cfg RepoConfig) (*RedshiftRepository, error)
- func (r *RedshiftRepository) Close() error
- func (r *RedshiftRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *RedshiftRepository) ListDatabases(ctx context.Context) ([]string, error)
- func (r *RedshiftRepository) Ping(ctx context.Context) error
- func (r *RedshiftRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
-
type Registry
- func NewRegistry() *Registry
- func (r *Registry) MustRegister(repoType string, constructor RepoConstructor)
- func (r *Registry) NewRepository(ctx context.Context, repoType string, cfg RepoConfig) (Repository, error)
- func (r *Registry) Register(repoType string, constructor RepoConstructor) error
- func (r *Registry) Unregister(repoType string)
- type RepoConfig
- type RepoConstructor
- type Repository
- type Sample
- type SampleParameters
- type SampleResult
- type Scanner
- type ScannerConfig
- type SchemaMetadata
- type SnowflakeConfig
-
type SnowflakeRepository
- func NewSnowflakeRepository(cfg RepoConfig) (*SnowflakeRepository, error)
- func (r *SnowflakeRepository) Close() error
- func (r *SnowflakeRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *SnowflakeRepository) ListDatabases(ctx context.Context) ([]string, error)
- func (r *SnowflakeRepository) Ping(ctx context.Context) error
- func (r *SnowflakeRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
-
type SqlServerRepository
- func NewSqlServerRepository(cfg RepoConfig) (*SqlServerRepository, error)
- func (r *SqlServerRepository) Close() error
- func (r *SqlServerRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
- func (r *SqlServerRepository) ListDatabases(ctx context.Context) ([]string, error)
- func (r *SqlServerRepository) Ping(ctx context.Context) error
- func (r *SqlServerRepository) SampleTable(ctx context.Context, params SampleParameters) (Sample, error)
- type TableMetadata
Constants
const ( RepoTypeDenodo = "denodo" )
const ( RepoTypeMysql = "mysql" )
const ( RepoTypeOracle = "oracle" )
const ( RepoTypePostgres = "postgres" )
const ( RepoTypeRedshift = "redshift" )
const ( RepoTypeSnowflake = "snowflake" )
const ( RepoTypeSqlServer = "sqlserver" )
Variables
var ( // DefaultRegistry is the default, global repository registry used by the // package of which a number of convenience functions in this package act // on. All currently out-of-the-box repository types are registered to this // registry by this package's init function. Users who want to use custom // Repository implementations, or just avoid global state altogether, should // use their own instance of Registry, instead of using DefaultRegistry and // the corresponding convenience functions. DefaultRegistry = NewRegistry() )
Functions
func MustRegister
func MustRegister(repoType string, constructor RepoConstructor)
MustRegister is a convenience function that delegates to DefaultRegistry. See Registry.MustRegister for more details.
func Register
func Register(repoType string, constructor RepoConstructor) error
Register is a convenience function that delegates to DefaultRegistry. See Registry.Register for more details.
func Unregister
func Unregister(repoType string)
Unregister is a convenience function that delegates to DefaultRegistry. See Registry.Unregister for more details.
Types
type AttributeMetadata
type AttributeMetadata struct { Schema string `field:"table_schema"` Table string `field:"table_name"` Name string `field:"column_name"` DataType string `field:"data_type"` }
AttributeMetadata represents the structure of a database attribute (i.e. column). It contains the schema, table, name, and data type of the attribute.
type DenodoRepository
type DenodoRepository struct { // contains filtered or unexported fields }
DenodoRepository is a Repository implementation for Denodo.
func NewDenodoRepository
func NewDenodoRepository(cfg RepoConfig) (*DenodoRepository, error)
NewDenodoRepository is the constructor for sql.
func (*DenodoRepository) Close
func (r *DenodoRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*DenodoRepository) Introspect
func (r *DenodoRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*DenodoRepository) ListDatabases
func (r *DenodoRepository) ListDatabases(_ context.Context) ([]string, error)
ListDatabases is left unimplemented for Denodo, because Denodo doesn't have the concept of databases.
func (*DenodoRepository) Ping
func (r *DenodoRepository) Ping(ctx context.Context) error
Ping delegates the ping to GenericRepository. See Repository.Ping and GenericRepository.Ping for more details.
func (*DenodoRepository) SampleTable
func (r *DenodoRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository, using a Denodo-specific table sample query. See Repository.SampleTable and GenericRepository.SampleTableWithQuery for more details.
type GenericRepository
type GenericRepository struct { // contains filtered or unexported fields }
GenericRepository implements generic SQL functionalities that work for a subset of ANSI SQL compatible databases. Many Repository implementations may partially or fully delegate to this implementation. In that respect, it acts somewhat as a base implementation which can be used by SQL-compatible repositories. Note that while GenericRepository is an implementation of the Repository interface, GenericRepository is meant to be used as a building block for other Repository implementations, rather than as a standalone implementation. Specifically, the Repository.ListDatabases method is left un-implemented, since there is no standard way to list databases across different SQL database platforms. It does however provide the ListDatabasesWithQuery method, which dependent implementations can use to provide a custom query to list databases.
func NewGenericRepository
func NewGenericRepository(repoType, database, connStr string, maxOpenConns uint) ( *GenericRepository, error, )
NewGenericRepository is a constructor for the GenericRepository type. It opens a database handle for a given repoType and returns a pointer to a new GenericRepository instance. A connection may or may not be established depending on the underlying database type, as determined by the repoType parameter. The maxOpenConns parameter specifies the maximum number of open connections to the database. The repoIncludePaths and repoExcludePaths parameters are used to filter the tables and columns that are introspected by the repository.
func NewGenericRepositoryFromDB
func NewGenericRepositoryFromDB(repoType, database string, db *sql.DB) *GenericRepository
NewGenericRepositoryFromDB instantiate a new GenericRepository based on a given sql.DB handle.
func (*GenericRepository) Close
func (r *GenericRepository) Close() error
Close closes the database connection used by the repository.
func (*GenericRepository) GetDb
func (r *GenericRepository) GetDb() *sql.DB
GetDb is a getter for the repository's sql.DB handle.
func (*GenericRepository) Introspect
func (r *GenericRepository) Introspect( ctx context.Context, params IntrospectParameters, ) (*Metadata, error)
Introspect calls IntrospectWithQuery with a default query string
func (*GenericRepository) IntrospectWithQuery
func (r *GenericRepository) IntrospectWithQuery( ctx context.Context, query string, params IntrospectParameters, ) (*Metadata, error)
IntrospectWithQuery executes a query against the information_schema table in the database which returns a four-column (all varchar) row set (of N rows, depending on the number of tables in the database) in the form:
table_schema, table_name, column_name, data_type
This row set represents all the columns of all the tables in the repository. The row set is then parsed into an instance of Metadata and returned. Additionally, any errors which occur during the query execution or parsing process will be returned.
func (*GenericRepository) ListDatabases
func (r *GenericRepository) ListDatabases(_ context.Context) ([]string, error)
ListDatabases is left unimplemented for GenericRepository, because there is no standard way to list databases across different SQL database platforms. See ListDatabasesWithQuery for a way to list databases using a custom query.
func (*GenericRepository) ListDatabasesWithQuery
func (r *GenericRepository) ListDatabasesWithQuery( ctx context.Context, query string, params ...any, ) ([]string, error)
ListDatabasesWithQuery returns a list of the names of all databases on the server, as determined by the given query. The query is expected to return a row set containing a single column corresponding to the database name. If the query returns more than one column, an error will be returned.
func (*GenericRepository) Ping
func (r *GenericRepository) Ping(ctx context.Context) error
Ping verifies the connection to the database used by this repository by executing a simple query. If the query fails, an error is returned.
func (*GenericRepository) SampleTable
func (r *GenericRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable samples the table referenced by the TableMetadata meta parameter by issuing a standard, ANSI-compatible SELECT query to the database. All attributes of the table are selected, and are quoted using double quotes. See Repository.SampleTable for more details.
func (*GenericRepository) SampleTableWithQuery
func (r *GenericRepository) SampleTableWithQuery( ctx context.Context, query string, params SampleParameters, ) (Sample, error)
SampleTableWithQuery calls SampleTable with a custom SQL query. Any placeholder parameters in the query should be passed via params.
type IntrospectParameters
type IntrospectParameters struct { // IncludePaths is a list of glob patterns that will be used to filter // the tables that will be introspected. If a table name matches any of // the patterns in this list, it will be included in the repository // metadata. IncludePaths []glob.Glob // ExcludePaths is a list of glob patterns that will be used to filter // the tables that will be introspected. If a table name matches any of // the patterns in this list, it will be excluded from the repository // metadata. ExcludePaths []glob.Glob }
IntrospectParameters is a struct that holds the parameters for the Introspect method of the Repository interface.
type Metadata
type Metadata struct { RepoType string Database string Schemas map[string]*SchemaMetadata }
Metadata represents the structure of a SQL database. The traditional hierarchy is: Server (cluster) > Database > Schema (namespace) > Table. Some database systems do not have the concept of a "database" (e.g. MySQL). In those cases, the 'Database' field is expected to be an empty string. See: https://stackoverflow.com/a/17943883
func NewMetadata
func NewMetadata(database string) *Metadata
NewMetadata creates a new Metadata object with the given repository type, repository name, and database name, with an empty map of schemas.
type MySqlRepository
type MySqlRepository struct { // contains filtered or unexported fields }
MySqlRepository is a Repository implementation for MySQL databases.
func NewMySqlRepository
func NewMySqlRepository(cfg RepoConfig) (*MySqlRepository, error)
NewMySqlRepository creates a new MySQL sql.
func (*MySqlRepository) Close
func (r *MySqlRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*MySqlRepository) Introspect
func (r *MySqlRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*MySqlRepository) ListDatabases
func (r *MySqlRepository) ListDatabases(ctx context.Context) ([]string, error)
ListDatabases returns a list of the names of all databases on the server by using a MySQL-specific database query. It delegates the actual work to GenericRepository.ListDatabasesWithQuery - see that method for more details.
func (*MySqlRepository) Ping
func (r *MySqlRepository) Ping(ctx context.Context) error
Ping delegates the ping to GenericRepository. See Repository.Ping and GenericRepository.Ping for more details.
func (*MySqlRepository) SampleTable
func (r *MySqlRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository, using a MySQL-specific table sample query. See Repository.SampleTable and GenericRepository.SampleTableWithQuery for more details.
type OracleConfig
type OracleConfig struct { // ServiceName is the Oracle service name. ServiceName string }
OracleConfig is a struct to hold Oracle-specific configuration.
func NewOracleConfigFromMap
func NewOracleConfigFromMap(cfg map[string]any) (OracleConfig, error)
NewOracleConfigFromMap creates a new OracleConfig from the given map. This is useful for parsing the Oracle-specific configuration from the RepoConfig.Advanced map, for example.
type OracleRepository
type OracleRepository struct { // contains filtered or unexported fields }
OracleRepository is a Repository implementation for Oracle databases.
func NewOracleRepository
func NewOracleRepository(cfg RepoConfig) (*OracleRepository, error)
NewOracleRepository creates a new Oracle repository.
func (*OracleRepository) Close
func (r *OracleRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*OracleRepository) Introspect
func (r *OracleRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository, using an Oracle-specific introspection query. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*OracleRepository) ListDatabases
func (r *OracleRepository) ListDatabases(_ context.Context) ([]string, error)
ListDatabases is left unimplemented for Oracle, because Oracle doesn't have the traditional concept of "databases". Note that Introspect already identifies all the accessible objects on the server.
func (*OracleRepository) Ping
func (r *OracleRepository) Ping(ctx context.Context) error
Ping verifies the connection to Oracle database used by this Oracle Normally we would just delegate to GenericRepository.Ping, however, that implementation executes a 'SELECT 1' query to test for connectivity, and Oracle being Oracle does not like this. Instead, we defer to the native Ping method implemented by the Oracle DB driver.
func (*OracleRepository) SampleTable
func (r *OracleRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository, using an Oracle-specific table sample query. See Repository.SampleTable and GenericRepository.SampleTableWithQuery for more details.
type PostgresRepository
type PostgresRepository struct { // contains filtered or unexported fields }
PostgresRepository is a Repository implementation for Postgres databases.
func NewPostgresRepository
func NewPostgresRepository(cfg RepoConfig) (*PostgresRepository, error)
NewPostgresRepository creates a new PostgresRepository.
func (*PostgresRepository) Close
func (r *PostgresRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*PostgresRepository) Introspect
func (r *PostgresRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*PostgresRepository) ListDatabases
func (r *PostgresRepository) ListDatabases(ctx context.Context) ([]string, error)
ListDatabases returns a list of the names of all databases on the server by using a Postgres-specific database query. It delegates the actual work to GenericRepository.ListDatabasesWithQuery - see that method for more details.
func (*PostgresRepository) Ping
func (r *PostgresRepository) Ping(ctx context.Context) error
Ping delegates the ping to GenericRepository. See Repository.Ping and GenericRepository.Ping for more details.
func (*PostgresRepository) SampleTable
func (r *PostgresRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository, using a Postgres-specific table sample query. See Repository.SampleTable and GenericRepository.SampleTableWithQuery for more details.
type RedshiftRepository
type RedshiftRepository struct { // contains filtered or unexported fields }
RedshiftRepository is a Repository implementation for Redshift databases.
func NewRedshiftRepository
func NewRedshiftRepository(cfg RepoConfig) (*RedshiftRepository, error)
NewRedshiftRepository creates a new RedshiftRepository.
func (*RedshiftRepository) Close
func (r *RedshiftRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*RedshiftRepository) Introspect
func (r *RedshiftRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*RedshiftRepository) ListDatabases
func (r *RedshiftRepository) ListDatabases(ctx context.Context) ([]string, error)
ListDatabases returns a list of the names of all databases on the server by using a Redshift-specific database query. It delegates the actual work to GenericRepository.ListDatabasesWithQuery - see that method for more details.
func (*RedshiftRepository) Ping
func (r *RedshiftRepository) Ping(ctx context.Context) error
Ping delegates the ping to GenericRepository. See Repository.Ping and GenericRepository.Ping for more details.
func (*RedshiftRepository) SampleTable
func (r *RedshiftRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository, using a Redshift-specific table sample query. See Repository.SampleTable and GenericRepository.SampleTableWithQuery for more details.
type Registry
type Registry struct { // contains filtered or unexported fields }
Registry is a repository registry that maps repository types to their respective constructor functions. It is used to create new repository instances based on the repository type. It is not thread-safe.
func NewRegistry
func NewRegistry() *Registry
NewRegistry creates a new Registry instance.
func (*Registry) MustRegister
func (r *Registry) MustRegister(repoType string, constructor RepoConstructor)
MustRegister is the same as Registry.Register, but panics if an error occurs.
func (*Registry) NewRepository
func (r *Registry) NewRepository(ctx context.Context, repoType string, cfg RepoConfig) (Repository, error)
NewRepository is a factory method to return a concrete Repository implementation based on the specified type, e.g. MySQL, Postgres, SQL Server, etc., which must be registered with the registry. If the repository type is not registered, an error is returned. A new instance of the repository is returned each time this method is called. Note that NewRepository is not thread-safe.
func (*Registry) Register
func (r *Registry) Register(repoType string, constructor RepoConstructor) error
Register makes a repository available by the provided repository type. If Register is called twice with the same repoType, or if constructor is nil, it returns an error. Note that Register is not thread-safe.
func (*Registry) Unregister
func (r *Registry) Unregister(repoType string)
Unregister removes a repository type from the registry. If the repository type is not registered, this method is a no-op. Note that Unregister is not thread-safe.
type RepoConfig
type RepoConfig struct { // Host is the hostname of the database. Host string // Port is the port of the database. Port uint16 // User is the username to connect to the database. User string // Password is the password to connect to the database. Password string // Database is the name of the database to connect to. Database string // MaxOpenConns is the maximum number of open connections to the database. MaxOpenConns uint // Advanced is a map of advanced configuration options. Advanced map[string]any }
RepoConfig is the necessary configuration to connect to a data sql.
type RepoConstructor
type RepoConstructor func(ctx context.Context, cfg RepoConfig) (Repository, error)
RepoConstructor represents the function signature that all repository implementations should use for their constructor functions.
type Repository
type Repository interface { // ListDatabases returns a list of the names of all databases on the server. ListDatabases(ctx context.Context) ([]string, error) // Introspect will read and analyze the basic properties of the repository // and return as a Metadata instance. This includes all the repository's // databases, schemas, tables, columns, and attributes. Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error) // SampleTable samples the table referenced by the TableMetadata meta // parameter and returns the sample as a slice of Sample. The parameters for // the sample, such as sample size, are passed via the params parameter (see // SampleParameters for more details). The returned sample result set // contains one Sample for each table row sampled. The length of the results // will be less than or equal to the sample size. If there are fewer results // than the specified sample size, it is because the table in question had a // row count less than the sample size. Prefer small sample sizes to limit // impact on the database. SampleTable(ctx context.Context, params SampleParameters) (Sample, error) // Ping is meant to be used as a general purpose connectivity test. It // should be invoked e.g. in the dry-run mode. Ping(ctx context.Context) error // Close is meant to be used as a general purpose cleanup. It should be // invoked when the Repository is no longer used. Close() error }
Repository represents a Dmap data SQL repository, and provides functionality to introspect its corresponding schema.
func NewRepository
func NewRepository(ctx context.Context, repoType string, cfg RepoConfig) (Repository, error)
NewRepository is a convenience function that delegates to DefaultRegistry. See Registry.NewRepository for more details.
type Sample
type Sample struct { // TablePath is the full path of the data repository table that was sampled. // Each element corresponds to a component, in increasing order of // granularity (e.g. [database, schema, table]). TablePath []string // Results is the set of sample results. Each SampleResult is equivalent to // a database row, where the map key is the column name and the map value is // the column value. Results []SampleResult }
Sample represents a sample of data from a database table.
type SampleParameters
type SampleParameters struct { // Metadata is the metadata for the table to be sampled. Metadata *TableMetadata // SampleSize is the number of rows to sample from the table. SampleSize uint // Offset is the number of rows to skip before starting the sample. Offset uint }
SampleParameters contains all parameters necessary to sample a table.
type SampleResult
type SampleResult map[string]any
SampleResult stores the results from a single database sample. It is equivalent to a database row, where the map key is the column name and the map value is the column value.
type Scanner
type Scanner struct { // contains filtered or unexported fields }
Scanner is a data discovery scanner that scans a data repository for sensitive data. It also classifies the data and publishes the results to the configured external sources. It currently only supports SQL-based repositories.
func NewScanner
func NewScanner(ctx context.Context, cfg ScannerConfig) (*Scanner, error)
NewScanner creates a new Scanner instance with the provided configuration.
func (*Scanner) Scan
func (s *Scanner) Scan(ctx context.Context) (*scan.RepoScanResults, error)
Scan performs the data repository scan. It introspects and samples the repository, classifies the sampled data, and publishes the results to the configured classification publisher.
type ScannerConfig
type ScannerConfig struct { RepoType string RepoConfig RepoConfig Registry *Registry IncludePaths, ExcludePaths []glob.Glob SampleSize uint Offset uint LabelsYamlFilename string }
ScannerConfig is the configuration for the Scanner.
type SchemaMetadata
type SchemaMetadata struct { Name string Tables map[string]*TableMetadata }
SchemaMetadata represents the structure of a database schema. It contains a map of tables that belong to the schema. The key is the table name and the value is the table metadata for that table.
func NewSchemaMetadata
func NewSchemaMetadata(schemaName string) *SchemaMetadata
NewSchemaMetadata creates a new SchemaMetadata object with the given schema name and an empty map of tables.
type SnowflakeConfig
type SnowflakeConfig struct { // Account is the Snowflake account name. Account string // Role is the Snowflake role name. Role string // Warehouse is the Snowflake warehouse name. Warehouse string }
SnowflakeConfig holds Snowflake-specific configuration parameters.
func NewSnowflakeConfigFromMap
func NewSnowflakeConfigFromMap(cfg map[string]any) (SnowflakeConfig, error)
NewSnowflakeConfigFromMap creates a new SnowflakeConfig from the given map. This is useful for parsing the Snowflake-specific configuration from the RepoConfig.Advanced map, for example.
type SnowflakeRepository
type SnowflakeRepository struct { // contains filtered or unexported fields }
SnowflakeRepository is a Repository implementation for Snowflake databases.
func NewSnowflakeRepository
func NewSnowflakeRepository(cfg RepoConfig) (*SnowflakeRepository, error)
NewSnowflakeRepository creates a new SnowflakeRepository.
func (*SnowflakeRepository) Close
func (r *SnowflakeRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*SnowflakeRepository) Introspect
func (r *SnowflakeRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*SnowflakeRepository) ListDatabases
func (r *SnowflakeRepository) ListDatabases(ctx context.Context) ([]string, error)
ListDatabases returns a list of the names of all databases on the server by using a Snowflake-specific database query. It delegates the actual work to GenericRepository.ListDatabasesWithQuery - see that method for more details.
func (*SnowflakeRepository) Ping
func (r *SnowflakeRepository) Ping(ctx context.Context) error
Ping delegates the ping to GenericRepository. See Repository.Ping and GenericRepository.Ping for more details.
func (*SnowflakeRepository) SampleTable
func (r *SnowflakeRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository. See Repository.SampleTable and GenericRepository.SampleTable for more details.
type SqlServerRepository
type SqlServerRepository struct { // contains filtered or unexported fields }
SqlServerRepository is a Repository implementation for MS SQL Server databases.
func NewSqlServerRepository
func NewSqlServerRepository(cfg RepoConfig) (*SqlServerRepository, error)
NewSqlServerRepository creates a new MS SQL Server sql.
func (*SqlServerRepository) Close
func (r *SqlServerRepository) Close() error
Close delegates the close to GenericRepository. See Repository.Close and GenericRepository.Close for more details.
func (*SqlServerRepository) Introspect
func (r *SqlServerRepository) Introspect(ctx context.Context, params IntrospectParameters) (*Metadata, error)
Introspect delegates introspection to GenericRepository. See Repository.Introspect and GenericRepository.IntrospectWithQuery for more details.
func (*SqlServerRepository) ListDatabases
func (r *SqlServerRepository) ListDatabases(ctx context.Context) ([]string, error)
ListDatabases returns a list of the names of all databases on the server by using a SQL Server-specific database query. It delegates the actual work to GenericRepository.ListDatabasesWithQuery - see that method for more details.
func (*SqlServerRepository) Ping
func (r *SqlServerRepository) Ping(ctx context.Context) error
Ping delegates the ping to GenericRepository. See Repository.Ping and GenericRepository.Ping for more details.
func (*SqlServerRepository) SampleTable
func (r *SqlServerRepository) SampleTable( ctx context.Context, params SampleParameters, ) (Sample, error)
SampleTable delegates sampling to GenericRepository, using a SQL Server-specific table sample query. See Repository.SampleTable and GenericRepository.SampleTableWithQuery for more details.
type TableMetadata
type TableMetadata struct { Schema string Name string Attributes []*AttributeMetadata }
TableMetadata represents the structure of a database table. It contains a slice of attributes (i.e. columns) that belong to the table.
func NewTableMetadata
func NewTableMetadata(schemaName, tableName string) *TableMetadata
NewTableMetadata creates a new TableMetadata object with the given schema and table name, and an empty slice of attributes.
func (*TableMetadata) AttributeNames
func (t *TableMetadata) AttributeNames() []string
AttributeNames returns a slice of attribute names for the table.
func (*TableMetadata) QuotedAttributeNamesString
func (t *TableMetadata) QuotedAttributeNamesString(quoteChar string) string
QuotedAttributeNamesString returns a string of comma-separated attribute names for the table, with each name quoted using the given quote character.