Skip to content

Commit

Permalink
refactor: enhance Spider model and string utility functions
Browse files Browse the repository at this point in the history
- Updated the Spider model to introduce a new SpiderTemplateParams struct for improved template handling.
- Refactored string utility functions in utils/string.go to include a new replaceChars function, streamlining character replacement across multiple functions.
- Enhanced ToSnakeCase and ToKebabCase functions to utilize the new replaceChars function for better maintainability and readability.
- Added splitStringWithQuotes function to facilitate string manipulation with quotes, improving overall utility in string processing.
  • Loading branch information
Marvin Zhang committed Jan 7, 2025
1 parent c3c629a commit c937e0f
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 24 deletions.
41 changes: 22 additions & 19 deletions core/models/models/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,21 @@ import (
type Spider struct {
any `collection:"spiders"`
BaseModel[Spider] `bson:",inline"`
Name string `json:"name" bson:"name"` // spider name
ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future
ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name
DbName string `json:"db_name,omitempty" bson:"db_name"` // database name
DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id
DataSource *Database `json:"data_source,omitempty" bson:"-"` // data source
Description string `json:"description" bson:"description"` // description
ProjectId primitive.ObjectID `json:"project_id" bson:"project_id"` // Project.Id
Mode string `json:"mode" bson:"mode"` // default Task.Mode
NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` // default Task.NodeIds
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
Git *Git `json:"git,omitempty" bson:"-"`
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
TemplateParams *struct {
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
Domains string `json:"domains,omitempty" bson:"domains,omitempty"`
} `json:"template_params,omitempty" bson:"template_params,omitempty"`
Name string `json:"name" bson:"name"` // spider name
ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future
ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name
DbName string `json:"db_name,omitempty" bson:"db_name"` // database name
DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id
DataSource *Database `json:"data_source,omitempty" bson:"-"` // data source
Description string `json:"description" bson:"description"` // description
ProjectId primitive.ObjectID `json:"project_id" bson:"project_id"` // Project.Id
Mode string `json:"mode" bson:"mode"` // default Task.Mode
NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` // default Task.NodeIds
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
Git *Git `json:"git,omitempty" bson:"-"`
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
TemplateParams *SpiderTemplateParams `json:"template_params,omitempty" bson:"template_params,omitempty"`

// stats
Stat *SpiderStat `json:"stat,omitempty" bson:"-"`
Expand All @@ -36,3 +32,10 @@ type Spider struct {
Priority int `json:"priority" bson:"priority"`
AutoInstall bool `json:"auto_install" bson:"auto_install"`
}

type SpiderTemplateParams struct {
ProjectName string `json:"project_name,omitempty" bson:"project_name,omitempty"`
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
AllowedDomains string `json:"allowed_domains,omitempty" bson:"allowed_domains,omitempty"`
}
54 changes: 49 additions & 5 deletions core/utils/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,29 @@ import (
"strings"
)

// replaceChars replaces characters in a string
// Parameters:
// - s: the string to replace characters in
// - o: the characters to replace
// - r: the replacement character
//
// Example:
//
// replaceChars("a-b-c", []string{"-"}, "_") => "a_b_c"
//
// Returns:
// - the string with characters replaced
func replaceChars(s string, o []string, r string) string {
for _, c := range o {
s = strings.ReplaceAll(s, c, r)
}
return s
}

func ToSnakeCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = strings.ReplaceAll(s, " ", "_")
s = strings.ReplaceAll(s, "-", "_")
return s
return replaceChars(s, []string{" ", "-", "."}, "_")
}

func ToPascalCase(s string) string {
Expand All @@ -25,7 +42,34 @@ func ToPascalCase(s string) string {
func ToKebabCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = strings.ReplaceAll(s, " ", "-")
s = strings.ReplaceAll(s, "_", "-")
return replaceChars(s, []string{" ", "_", "."}, "-")
}

// splitStringWithQuotes splits a string with quotes
// Parameters:
// - s: the string to split
// - q: the quote character
// - d: the delimiter
// - r: the replacement
//
// Example:
//
// splitStringWithQuotes("a,b,c", "'", ",", ", ") => "'a', 'b', 'c'"
//
// Returns:
// - the split string
func splitStringWithQuotes(s, q, d, r string) string {
s = strings.TrimSpace(s)
s = strings.ReplaceAll(s, " ", "")
s = strings.ReplaceAll(s, d, q+r+q)
s = q + s + q
return s
}

func SplitStringWithSingleQuotes(s string) string {
return splitStringWithQuotes(s, "'", ",", ", ")
}

func SplitStringWithDoubleQuotes(s string) string {
return splitStringWithQuotes(s, "\"", ",", "\", \"")
}

0 comments on commit c937e0f

Please sign in to comment.