From c937e0f45f03a2d83597a38e236d37f96fbf0cb7 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Tue, 7 Jan 2025 13:21:16 +0800 Subject: [PATCH] refactor: enhance Spider model and string utility functions - Updated the Spider model to introduce a new SpiderTemplateParams struct for improved template handling. - Refactored string utility functions in utils/string.go to include a new replaceChars function, streamlining character replacement across multiple functions. - Enhanced ToSnakeCase and ToKebabCase functions to utilize the new replaceChars function for better maintainability and readability. - Added splitStringWithQuotes function to facilitate string manipulation with quotes, improving overall utility in string processing. --- core/models/models/spider.go | 41 ++++++++++++++------------- core/utils/string.go | 54 ++++++++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/core/models/models/spider.go b/core/models/models/spider.go index 684e34c0..ddc0be16 100644 --- a/core/models/models/spider.go +++ b/core/models/models/spider.go @@ -7,25 +7,21 @@ import ( type Spider struct { any `collection:"spiders"` BaseModel[Spider] `bson:",inline"` - Name string `json:"name" bson:"name"` // spider name - ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future - ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name - DbName string `json:"db_name,omitempty" bson:"db_name"` // database name - DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id - DataSource *Database `json:"data_source,omitempty" bson:"-"` // data source - Description string `json:"description" bson:"description"` // description - ProjectId primitive.ObjectID `json:"project_id" bson:"project_id"` // Project.Id - Mode string `json:"mode" bson:"mode"` // default Task.Mode - NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` // default Task.NodeIds - GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id - GitRootPath string `json:"git_root_path" bson:"git_root_path"` - Git *Git `json:"git,omitempty" bson:"-"` - Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template - TemplateParams *struct { - SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"` - StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"` - Domains string `json:"domains,omitempty" bson:"domains,omitempty"` - } `json:"template_params,omitempty" bson:"template_params,omitempty"` + Name string `json:"name" bson:"name"` // spider name + ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future + ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name + DbName string `json:"db_name,omitempty" bson:"db_name"` // database name + DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id + DataSource *Database `json:"data_source,omitempty" bson:"-"` // data source + Description string `json:"description" bson:"description"` // description + ProjectId primitive.ObjectID `json:"project_id" bson:"project_id"` // Project.Id + Mode string `json:"mode" bson:"mode"` // default Task.Mode + NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` // default Task.NodeIds + GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id + GitRootPath string `json:"git_root_path" bson:"git_root_path"` + Git *Git `json:"git,omitempty" bson:"-"` + Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template + TemplateParams *SpiderTemplateParams `json:"template_params,omitempty" bson:"template_params,omitempty"` // stats Stat *SpiderStat `json:"stat,omitempty" bson:"-"` @@ -36,3 +32,10 @@ type Spider struct { Priority int `json:"priority" bson:"priority"` AutoInstall bool `json:"auto_install" bson:"auto_install"` } + +type SpiderTemplateParams struct { + ProjectName string `json:"project_name,omitempty" bson:"project_name,omitempty"` + SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"` + StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"` + AllowedDomains string `json:"allowed_domains,omitempty" bson:"allowed_domains,omitempty"` +} diff --git a/core/utils/string.go b/core/utils/string.go index b2b6c345..ed726045 100644 --- a/core/utils/string.go +++ b/core/utils/string.go @@ -6,12 +6,29 @@ import ( "strings" ) +// replaceChars replaces characters in a string +// Parameters: +// - s: the string to replace characters in +// - o: the characters to replace +// - r: the replacement character +// +// Example: +// +// replaceChars("a-b-c", []string{"-"}, "_") => "a_b_c" +// +// Returns: +// - the string with characters replaced +func replaceChars(s string, o []string, r string) string { + for _, c := range o { + s = strings.ReplaceAll(s, c, r) + } + return s +} + func ToSnakeCase(s string) string { s = strings.TrimSpace(s) s = strings.ToLower(s) - s = strings.ReplaceAll(s, " ", "_") - s = strings.ReplaceAll(s, "-", "_") - return s + return replaceChars(s, []string{" ", "-", "."}, "_") } func ToPascalCase(s string) string { @@ -25,7 +42,34 @@ func ToPascalCase(s string) string { func ToKebabCase(s string) string { s = strings.TrimSpace(s) s = strings.ToLower(s) - s = strings.ReplaceAll(s, " ", "-") - s = strings.ReplaceAll(s, "_", "-") + return replaceChars(s, []string{" ", "_", "."}, "-") +} + +// splitStringWithQuotes splits a string with quotes +// Parameters: +// - s: the string to split +// - q: the quote character +// - d: the delimiter +// - r: the replacement +// +// Example: +// +// splitStringWithQuotes("a,b,c", "'", ",", ", ") => "'a', 'b', 'c'" +// +// Returns: +// - the split string +func splitStringWithQuotes(s, q, d, r string) string { + s = strings.TrimSpace(s) + s = strings.ReplaceAll(s, " ", "") + s = strings.ReplaceAll(s, d, q+r+q) + s = q + s + q return s } + +func SplitStringWithSingleQuotes(s string) string { + return splitStringWithQuotes(s, "'", ",", ", ") +} + +func SplitStringWithDoubleQuotes(s string) string { + return splitStringWithQuotes(s, "\"", ",", "\", \"") +}