init
This commit is contained in:
		
							
								
								
									
										59
									
								
								extractor/util/util.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								extractor/util/util.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,59 @@
 | 
			
		||||
package util
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"golang.org/x/net/html"
 | 
			
		||||
 | 
			
		||||
	"net/http"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Retrieve JavaScript embedded in HTML
 | 
			
		||||
func GetHTMLScriptFunc(url string, readCodeLineByLine bool, codeFunc func(code string) bool) error {
 | 
			
		||||
	resp, err := http.Get(url)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	defer resp.Body.Close()
 | 
			
		||||
 | 
			
		||||
	z := html.NewTokenizer(resp.Body)
 | 
			
		||||
	isScript := false
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		tt := z.Next()
 | 
			
		||||
 | 
			
		||||
		switch tt {
 | 
			
		||||
		case html.ErrorToken:
 | 
			
		||||
			return z.Err()
 | 
			
		||||
		case html.TextToken:
 | 
			
		||||
			if codeFunc != nil && isScript {
 | 
			
		||||
				t := string(z.Text())
 | 
			
		||||
				if readCodeLineByLine {
 | 
			
		||||
					// NOTE: a bufio line scanner doesn't work (bufio.Scanner: token too long); maybe this is a bug
 | 
			
		||||
					// Iterate over each line in the script
 | 
			
		||||
					ls := 0 // line start
 | 
			
		||||
					le := 0 // line end
 | 
			
		||||
					for ls < len(t) {
 | 
			
		||||
						if le == len(t) || t[le] == '\n' {
 | 
			
		||||
							ln := t[ls:le]
 | 
			
		||||
 | 
			
		||||
							if !codeFunc(ln) {
 | 
			
		||||
								return nil
 | 
			
		||||
							}
 | 
			
		||||
 | 
			
		||||
							ls = le + 1
 | 
			
		||||
						}
 | 
			
		||||
						le++
 | 
			
		||||
					}
 | 
			
		||||
				} else {
 | 
			
		||||
					if !codeFunc(t) {
 | 
			
		||||
						return nil
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		case html.StartTagToken, html.EndTagToken:
 | 
			
		||||
			tn, _ := z.TagName()
 | 
			
		||||
			if string(tn) == "script" {
 | 
			
		||||
				isScript = tt == html.StartTagToken
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user