-
Notifications
You must be signed in to change notification settings - Fork 110
/
parse.go
424 lines (368 loc) · 12 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
package jar
import (
"archive/zip"
"bufio"
"crypto/sha1"
"encoding/hex"
"fmt"
dio "github.com/aquasecurity/go-dep-parser/pkg/io"
"github.com/aquasecurity/go-dep-parser/pkg/log"
"github.com/aquasecurity/go-dep-parser/pkg/types"
"github.com/samber/lo"
"go.uber.org/zap"
"golang.org/x/xerrors"
"io"
"os"
"path"
"path/filepath"
"regexp"
"strings"
)
var (
jarFileRegEx = regexp.MustCompile(`^([a-zA-Z0-9\._-]*[^-*])-(\d\S*(?:-SNAPSHOT)?).jar$`)
)
type Client interface {
Exists(groupID, artifactID string) (bool, error)
SearchBySHA1(sha1 string) (Properties, error)
SearchByArtifactID(artifactID, version string) (string, error)
}
type Parser struct {
rootFilePath string
offline bool
size int64
client Client
}
type Option func(*Parser)
func WithFilePath(filePath string) Option {
return func(p *Parser) {
p.rootFilePath = filePath
}
}
func WithOffline(offline bool) Option {
return func(p *Parser) {
p.offline = offline
}
}
func WithSize(size int64) Option {
return func(p *Parser) {
p.size = size
}
}
func NewParser(c Client, opts ...Option) types.Parser {
p := &Parser{
client: c,
}
for _, opt := range opts {
opt(p)
}
return p
}
func (p *Parser) Parse(r dio.ReadSeekerAt) ([]types.Library, []types.Dependency, error) {
libs, deps, err := p.parseArtifact(p.rootFilePath, p.size, r)
if err != nil {
return nil, nil, xerrors.Errorf("unable to parse %s: %w", p.rootFilePath, err)
}
return removeLibraryDuplicates(libs), deps, nil
}
func (p *Parser) parseArtifact(filePath string, size int64, r dio.ReadSeekerAt) ([]types.Library, []types.Dependency, error) {
log.Logger.Debugw("Parsing Java artifacts...", zap.String("file", filePath))
zr, err := zip.NewReader(r, size)
if err != nil {
return nil, nil, xerrors.Errorf("zip error: %w", err)
}
// Try to extract artifactId and version from the file name
// e.g. spring-core-5.3.4-SNAPSHOT.jar => sprint-core, 5.3.4-SNAPSHOT
fileName := filepath.Base(filePath)
fileProps := parseFileName(filePath)
var libs []types.Library
var m manifest
var foundPomProps bool
for _, fileInJar := range zr.File {
switch {
case filepath.Base(fileInJar.Name) == "pom.properties":
props, err := parsePomProperties(fileInJar, filePath)
if err != nil {
return nil, nil, xerrors.Errorf("failed to parse %s: %w", fileInJar.Name, err)
}
libs = append(libs, props.Library())
// Check if the pom.properties is for the original JAR/WAR/EAR
if fileProps.ArtifactID == props.ArtifactID && fileProps.Version == props.Version {
foundPomProps = true
}
case filepath.Base(fileInJar.Name) == "MANIFEST.MF":
m, err = parseManifest(fileInJar)
if err != nil {
return nil, nil, xerrors.Errorf("failed to parse MANIFEST.MF: %w", err)
}
case isArtifact(fileInJar.Name):
innerLibs, _, err := p.parseInnerJar(fileInJar, filePath) //TODO process inner deps
if err != nil {
log.Logger.Debugf("Failed to parse %s: %s", fileInJar.Name, err)
continue
}
libs = append(libs, innerLibs...)
}
}
// If pom.properties is found, it should be preferred than MANIFEST.MF.
if foundPomProps {
return libs, nil, nil
}
manifestProps := m.properties(filePath)
if p.offline {
// In offline mode, we will not check if the artifact information is correct.
if !manifestProps.Valid() {
log.Logger.Debugw("Unable to identify POM in offline mode", zap.String("file", fileName))
return libs, nil, nil
}
return append(libs, manifestProps.Library()), nil, nil
}
if manifestProps.Valid() {
// Even if MANIFEST.MF is found, the groupId and artifactId might not be valid.
// We have to make sure that the artifact exists actually.
if ok, _ := p.client.Exists(manifestProps.GroupID, manifestProps.ArtifactID); ok {
// If groupId and artifactId are valid, they will be returned.
return append(libs, manifestProps.Library()), nil, nil
}
}
// If groupId and artifactId are not found, call Maven Central's search API with SHA-1 digest.
props, err := p.searchBySHA1(r, filePath)
if err == nil {
return append(libs, props.Library()), nil, nil
} else if !xerrors.Is(err, ArtifactNotFoundErr) {
return nil, nil, xerrors.Errorf("failed to search by SHA1: %w", err)
}
log.Logger.Debugw("No such POM in the central repositories", zap.String("file", fileName))
// Return when artifactId or version from the file name are empty
if fileProps.ArtifactID == "" || fileProps.Version == "" {
return libs, nil, nil
}
// Try to search groupId by artifactId via sonatype API
// When some artifacts have the same groupIds, it might result in false detection.
fileProps.GroupID, err = p.client.SearchByArtifactID(fileProps.ArtifactID, fileProps.Version)
if err == nil {
log.Logger.Debugw("POM was determined in a heuristic way", zap.String("file", fileName),
zap.String("artifact", fileProps.String()))
libs = append(libs, fileProps.Library())
} else if !xerrors.Is(err, ArtifactNotFoundErr) {
return nil, nil, xerrors.Errorf("failed to search by artifact id: %w", err)
}
return libs, nil, nil
}
func (p *Parser) parseInnerJar(zf *zip.File, rootPath string) ([]types.Library, []types.Dependency, error) {
fr, err := zf.Open()
if err != nil {
return nil, nil, xerrors.Errorf("unable to open %s: %w", zf.Name, err)
}
f, err := os.CreateTemp("", "inner")
if err != nil {
return nil, nil, xerrors.Errorf("unable to create a temp file: %w", err)
}
defer func() {
f.Close()
os.Remove(f.Name())
}()
// Copy the file content to the temp file
if _, err = io.Copy(f, fr); err != nil {
return nil, nil, xerrors.Errorf("file copy error: %w", err)
}
// build full path to inner jar
fullPath := path.Join(rootPath, zf.Name)
// Parse jar/war/ear recursively
innerLibs, innerDeps, err := p.parseArtifact(fullPath, int64(zf.UncompressedSize64), f)
if err != nil {
return nil, nil, xerrors.Errorf("failed to parse %s: %w", zf.Name, err)
}
return innerLibs, innerDeps, nil
}
func (p *Parser) searchBySHA1(r io.ReadSeeker, filePath string) (Properties, error) {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return Properties{}, xerrors.Errorf("file seek error: %w", err)
}
h := sha1.New()
if _, err := io.Copy(h, r); err != nil {
return Properties{}, xerrors.Errorf("unable to calculate SHA-1: %w", err)
}
s := hex.EncodeToString(h.Sum(nil))
prop, err := p.client.SearchBySHA1(s)
if err != nil {
return Properties{}, err
}
prop.FilePath = filePath
return prop, nil
}
func isArtifact(name string) bool {
ext := filepath.Ext(name)
if ext == ".jar" || ext == ".ear" || ext == ".war" {
return true
}
return false
}
func parseFileName(filePath string) Properties {
fileName := filepath.Base(filePath)
packageVersion := jarFileRegEx.FindStringSubmatch(fileName)
if len(packageVersion) != 3 {
return Properties{}
}
return Properties{
ArtifactID: packageVersion[1],
Version: packageVersion[2],
FilePath: filePath,
}
}
func parsePomProperties(f *zip.File, filePath string) (Properties, error) {
file, err := f.Open()
if err != nil {
return Properties{}, xerrors.Errorf("unable to open pom.properties: %w", err)
}
defer file.Close()
p := Properties{
FilePath: filePath,
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
switch {
case strings.HasPrefix(line, "groupId="):
p.GroupID = strings.TrimPrefix(line, "groupId=")
case strings.HasPrefix(line, "artifactId="):
p.ArtifactID = strings.TrimPrefix(line, "artifactId=")
case strings.HasPrefix(line, "version="):
p.Version = strings.TrimPrefix(line, "version=")
}
}
if err = scanner.Err(); err != nil {
return Properties{}, xerrors.Errorf("scan error: %w", err)
}
return p, nil
}
type manifest struct {
implementationVersion string
implementationTitle string
implementationVendor string
implementationVendorId string
specificationTitle string
specificationVersion string
specificationVendor string
bundleName string
bundleVersion string
bundleSymbolicName string
}
func parseManifest(f *zip.File) (manifest, error) {
file, err := f.Open()
if err != nil {
return manifest{}, xerrors.Errorf("unable to open MANIFEST.MF: %w", err)
}
defer file.Close()
var m manifest
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
// Skip variables. e.g. Bundle-Name: %bundleName
ss := strings.Fields(line)
if len(ss) <= 1 || (len(ss) > 1 && strings.HasPrefix(ss[1], "%")) {
continue
}
// It is not determined which fields are present in each application.
// In some cases, none of them are included, in which case they cannot be detected.
switch {
case strings.HasPrefix(line, "Implementation-Version:"):
m.implementationVersion = strings.TrimPrefix(line, "Implementation-Version:")
case strings.HasPrefix(line, "Implementation-Title:"):
m.implementationTitle = strings.TrimPrefix(line, "Implementation-Title:")
case strings.HasPrefix(line, "Implementation-Vendor:"):
m.implementationVendor = strings.TrimPrefix(line, "Implementation-Vendor:")
case strings.HasPrefix(line, "Implementation-Vendor-Id:"):
m.implementationVendorId = strings.TrimPrefix(line, "Implementation-Vendor-Id:")
case strings.HasPrefix(line, "Specification-Version:"):
m.specificationVersion = strings.TrimPrefix(line, "Specification-Version:")
case strings.HasPrefix(line, "Specification-Title:"):
m.specificationTitle = strings.TrimPrefix(line, "Specification-Title:")
case strings.HasPrefix(line, "Specification-Vendor:"):
m.specificationVendor = strings.TrimPrefix(line, "Specification-Vendor:")
case strings.HasPrefix(line, "Bundle-Version:"):
m.bundleVersion = strings.TrimPrefix(line, "Bundle-Version:")
case strings.HasPrefix(line, "Bundle-Name:"):
m.bundleName = strings.TrimPrefix(line, "Bundle-Name:")
case strings.HasPrefix(line, "Bundle-SymbolicName:"):
m.bundleSymbolicName = strings.TrimPrefix(line, "Bundle-SymbolicName:")
}
}
if err = scanner.Err(); err != nil {
return manifest{}, xerrors.Errorf("scan error: %w", err)
}
return m, nil
}
func (m manifest) properties(filePath string) Properties {
groupID, err := m.determineGroupID()
if err != nil {
return Properties{}
}
artifactID, err := m.determineArtifactID()
if err != nil {
return Properties{}
}
version, err := m.determineVersion()
if err != nil {
return Properties{}
}
return Properties{
GroupID: groupID,
ArtifactID: artifactID,
Version: version,
FilePath: filePath,
}
}
func (m manifest) determineGroupID() (string, error) {
var groupID string
switch {
case m.implementationVendorId != "":
groupID = m.implementationVendorId
case m.bundleSymbolicName != "":
groupID = m.bundleSymbolicName
// e.g. "com.fasterxml.jackson.core.jackson-databind" => "com.fasterxml.jackson.core"
idx := strings.LastIndex(m.bundleSymbolicName, ".")
if idx > 0 {
groupID = m.bundleSymbolicName[:idx]
}
case m.implementationVendor != "":
groupID = m.implementationVendor
case m.specificationVendor != "":
groupID = m.specificationVendor
default:
return "", xerrors.New("no groupID found")
}
return strings.TrimSpace(groupID), nil
}
func (m manifest) determineArtifactID() (string, error) {
var artifactID string
switch {
case m.implementationTitle != "":
artifactID = m.implementationTitle
case m.specificationTitle != "":
artifactID = m.specificationTitle
case m.bundleName != "":
artifactID = m.bundleName
default:
return "", xerrors.New("no artifactID found")
}
return strings.TrimSpace(artifactID), nil
}
func (m manifest) determineVersion() (string, error) {
var version string
switch {
case m.implementationVersion != "":
version = m.implementationVersion
case m.specificationVersion != "":
version = m.specificationVersion
case m.bundleVersion != "":
version = m.bundleVersion
default:
return "", xerrors.New("no version found")
}
return strings.TrimSpace(version), nil
}
func removeLibraryDuplicates(libs []types.Library) []types.Library {
return lo.UniqBy(libs, func(lib types.Library) string {
return fmt.Sprintf("%s::%s::%s", lib.Name, lib.Version, lib.FilePath)
})
}