mirror of https://github.com/misterzym/jdocset.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
203 lines
5.5 KiB
203 lines
5.5 KiB
package main |
|
|
|
import ( |
|
"os" |
|
"github.com/yhat/scrape" |
|
"strings" |
|
"golang.org/x/net/html" |
|
"golang.org/x/net/html/atom" |
|
) |
|
|
|
type Verifier func(string) bool |
|
type TypeEvaluator func(Verifier, Verifier) bool |
|
type IndexEntry struct { |
|
name string |
|
elementType ElementType |
|
path string |
|
} |
|
|
|
var ALL_ELEMENT_TYPES = []ElementType{ |
|
Class, |
|
Method, |
|
Field, |
|
Constructor, |
|
Interface, |
|
Exception, |
|
Error, |
|
Enum, |
|
Trait, |
|
Notation, |
|
Package, |
|
} |
|
|
|
var ELEMENT_TYPE_TO_TYPE_EVALUATORS = map[ElementType][]TypeEvaluator{ |
|
Class: NewTypeEvaluators(isClass), |
|
Method: NewTypeEvaluators(isStaticMethod, isMethod), |
|
Field: NewTypeEvaluators(isStaticField, isField), |
|
Constructor: NewTypeEvaluators(isConstructor), |
|
Interface: NewTypeEvaluators(isInterface), |
|
Exception: NewTypeEvaluators(isException), |
|
Error: NewTypeEvaluators(isError), |
|
Enum: NewTypeEvaluators(isEnum), |
|
Trait: NewTypeEvaluators(isTrait), |
|
Notation: NewTypeEvaluators(isNotation), |
|
Package: NewTypeEvaluators(isPackage), |
|
} |
|
|
|
func parseIndex(indexFilePath string, entryHandler func(IndexEntry)) { |
|
|
|
log.Info("Indexing from file", "file", indexFilePath) |
|
|
|
indexed := 0 |
|
file, err := os.OpenFile(indexFilePath, os.O_RDONLY, 0666) |
|
|
|
if err != nil { |
|
log.Error("Unable to open file", "file", indexFilePath, "error", err) |
|
return |
|
} |
|
|
|
root, err := html.Parse(file) |
|
|
|
if err != nil { |
|
log.Error("Unable to parse index", "file", file, "error", err) |
|
return |
|
} |
|
|
|
anchorTags := scrape.FindAll(root, scrape.ByTag(atom.A)) |
|
|
|
for _, tag := range anchorTags { |
|
var parentTag = tag.Parent |
|
|
|
if parentTag.FirstChild != tag { |
|
continue |
|
} |
|
|
|
isParentSpan := parentTag.DataAtom == atom.Span |
|
isParentCode := parentTag.DataAtom == atom.Code |
|
isParentItalic := parentTag.DataAtom == atom.I |
|
isParentBold := parentTag.DataAtom == atom.B |
|
|
|
if isParentSpan || isParentCode || isParentItalic || isParentBold { |
|
parentTag = parentTag.Parent |
|
if parentTag.FirstChild != tag.Parent { |
|
continue |
|
} |
|
} |
|
|
|
if parentTag.DataAtom != atom.Dt { |
|
continue |
|
} |
|
|
|
text := scrape.Text(parentTag) |
|
var tagType ElementType = NotFound |
|
var dtClassName = scrape.Attr(parentTag, "class") |
|
|
|
lowercaseText := strings.ToLower(text) |
|
|
|
textContainsInsensitive := func(s string) bool { |
|
return strings.Contains(lowercaseText, s) |
|
} |
|
|
|
dtClassNameHasSuffix := func(s string) bool { |
|
return strings.HasSuffix(dtClassName, s) |
|
} |
|
|
|
tagTypeDetermined := false |
|
|
|
for _, elementType := range ALL_ELEMENT_TYPES { |
|
|
|
typeEvaluators := ELEMENT_TYPE_TO_TYPE_EVALUATORS[elementType] |
|
|
|
for _, evaluator := range typeEvaluators { |
|
|
|
if evaluator(textContainsInsensitive, dtClassNameHasSuffix) { |
|
tagType = elementType |
|
tagTypeDetermined = true |
|
break |
|
} |
|
} |
|
|
|
if tagTypeDetermined { |
|
break |
|
} |
|
} |
|
|
|
if tagType == NotFound { |
|
log.Warn("Warning: could not determine type", "text", text, "dtClassName", dtClassName) |
|
continue |
|
} |
|
|
|
name := scrape.Text(tag) |
|
path := scrape.Attr(tag, "href") |
|
|
|
entryHandler(IndexEntry{name: name, elementType: tagType, path: path}) |
|
|
|
indexed++ |
|
} |
|
|
|
log.Info("Indexed", "count", indexed) |
|
} |
|
|
|
func NewTypeEvaluators(a TypeEvaluator, others ...TypeEvaluator) []TypeEvaluator { |
|
|
|
typeEvaluators := make([]TypeEvaluator, 1 + len(others)) |
|
|
|
typeEvaluators[0] = a |
|
for i, typeEvaluator := range others { |
|
typeEvaluators[i + 1] = typeEvaluator |
|
} |
|
|
|
return typeEvaluators |
|
} |
|
|
|
func isClass(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("class in") || textContainsInsensitive("- class") || dtClassNameHasSuffix("class") |
|
} |
|
|
|
func isStaticMethod(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("static method in") || dtClassNameHasSuffix("method") |
|
} |
|
|
|
func isStaticField(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("static variable in") || textContainsInsensitive("field in") || dtClassNameHasSuffix("field") |
|
} |
|
|
|
func isConstructor(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("constructor") || dtClassNameHasSuffix("constructor") |
|
} |
|
|
|
func isMethod(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("method in") |
|
} |
|
|
|
func isField(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("variable in") |
|
} |
|
|
|
func isInterface(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("interface in") || textContainsInsensitive("- interface") || dtClassNameHasSuffix("interface") |
|
} |
|
|
|
func isException(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("exception in") || textContainsInsensitive("- exception") || dtClassNameHasSuffix("exception") |
|
} |
|
|
|
func isError(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("error in") || textContainsInsensitive("- error") || dtClassNameHasSuffix("error") |
|
} |
|
|
|
func isEnum(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("enum in") || textContainsInsensitive("- enum") || dtClassNameHasSuffix("enum") |
|
} |
|
|
|
func isTrait(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("trait in") |
|
} |
|
|
|
func isNotation(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("annotation type") || dtClassNameHasSuffix("annotation") |
|
} |
|
|
|
func isPackage(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool { |
|
return textContainsInsensitive("package") || dtClassNameHasSuffix("package") |
|
}
|
|
|