Browse Source

Initial commit

master
William Heng 8 years ago
commit
9e865cbe6c
  1. 106
      .gitignore
  2. 36
      elementType.go
  3. 356
      main.go
  4. 203
      parser.go

106
.gitignore vendored

@ -0,0 +1,106 @@
# Created by https://www.gitignore.io/api/go,macos,intellij+iml
### Go ###
# Binaries for programs and plugins
*.exe
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
.glide/
### Intellij+iml ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
# Gradle:
.idea/**/gradle.xml
.idea/**/libraries
# CMake
cmake-build-debug/
# Mongo Explorer plugin:
.idea/**/mongoSettings.xml
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
/out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
### Intellij+iml Patch ###
# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
*.iml
modules.xml
.idea/misc.xml
*.ipr
### macOS ###
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# End of https://www.gitignore.io/api/go,macos,intellij+iml

36
elementType.go

@ -0,0 +1,36 @@
package main
type ElementType int
const (
NotFound ElementType = iota
Class
Method
Field
Constructor
Interface
Exception
Error
Enum
Trait
Notation
Package
)
var VALUES = map[ElementType]string{
Class: "Class",
Method: "Method",
Field: "Field",
Constructor: "Constructor",
Interface: "Interface",
Exception: "Exception",
Error: "Error",
Enum: "Enum",
Trait: "Trait",
Notation: "Notation",
Package: "Package",
}
func (e *ElementType) value() string {
return VALUES[*e]
}

356
main.go

@ -0,0 +1,356 @@
package main
import (
"os"
"path"
"github.com/inconshreveable/log15"
"path/filepath"
"errors"
"strings"
"io"
"database/sql"
_ "github.com/mattn/go-sqlite3"
"fmt"
)
const OVERVIEW_SUMMARY = "overview-summary.html"
var log = log15.New()
var toIndex []string
func main() {
arguments := os.Args
argLength := len(arguments)
if (argLength == 2 && arguments[1] == "--help") {
printUsage()
return
} else if (argLength != 3) {
log.Error("Invalid argument(s) provided")
printUsage()
os.Exit(1)
}
docsetName := path.Clean(arguments[1])
var javadocPath = path.Clean(arguments[2])
log.Info("Running with arguments", "docsetName", docsetName, "javadocPath", javadocPath)
docsetDirectoryPath := docsetName + ".docset"
if exists, _ := pathExists(docsetDirectoryPath); exists {
log.Info("Removing existing docset directory", "Docset directory path", docsetDirectoryPath)
if err := os.RemoveAll(docsetDirectoryPath); err != nil {
log.Error(
"Unable to remove existing docset directory",
"Docset directory path", docsetDirectoryPath,
"error", err,
)
os.Exit(1)
}
}
contentsDirectoryPath := docsetDirectoryPath + "/Contents"
resourcesDirectoryPath := contentsDirectoryPath + "/Resources"
documentsDirectoryPath := resourcesDirectoryPath + "/Documents"
log.Info("Creating docset folder structure...")
if err := os.MkdirAll(documentsDirectoryPath, os.ModePerm); err != nil {
log.Error("Unable to create docset folder structure", "Docset directory", docsetDirectoryPath)
os.Exit(1)
}
var docsetIndexFile string
overviewSummaryPath := javadocPath + OVERVIEW_SUMMARY
var summaryFound = false
if exists, _ := pathExists(overviewSummaryPath); !exists {
walkCount := 0
filepath.Walk(javadocPath, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Failed to walk path", "path", filePath, "err", err)
os.Exit(1)
}
walkCount++
if walkCount < 10000 {
if info.Name() == OVERVIEW_SUMMARY {
javadocPath = path.Dir(filePath)
summaryFound = true
}
return nil
} else {
return errors.New("Hit file enumeration limit")
}
})
} else {
summaryFound = true
}
if summaryFound {
docsetIndexFile = OVERVIEW_SUMMARY
}
hasMultipleIndices := false
indexFilesPath := javadocPath + "index-files"
if exists, _ := pathExists(indexFilesPath); exists {
if docsetIndexFile == "" {
docsetIndexFile = "index-files/index-1.html"
}
hasMultipleIndices = true
}
log.Info("Done!")
copyFiles(documentsDirectoryPath, javadocPath)
documentsDirectoryIndex := documentsDirectoryPath + "/index-all.html"
if exists, _ := pathExists(documentsDirectoryIndex); !hasMultipleIndices && exists {
toIndex = append(toIndex, documentsDirectoryIndex)
if docsetIndexFile == "" {
docsetIndexFile = "index-all.html"
}
} else {
indexFilesPath := documentsDirectoryPath + "/index-files"
filepath.Walk(indexFilesPath, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Failed to walk path", "filePath", filePath, "err", err)
os.Exit(1)
}
filename := info.Name()
if strings.HasPrefix(filename, "index-") && strings.HasSuffix(filename, ".html") {
toIndex = append(toIndex, filePath)
}
return err
})
}
if len(toIndex) == 0 {
log.Error("API folder specified does not contain any index files (either an 'index-all.html' file or an 'index-files' folder and is not valid")
printUsage()
return
}
writeInfoPlist(docsetName, docsetIndexFile, contentsDirectoryPath)
initDB(resourcesDirectoryPath, index(toIndex))
}
func printUsage() {
log.Info("Usage: javadocset <docset name> <javadoc API folder>")
log.Info("<docset name> - anything you want")
log.Info("<javadoc API folder> - the path of the javadoc API folder you want to index")
}
func copyFiles(documentsDirectoryPath, javadocPath string) {
log.Info("Copying files...", "source", javadocPath, "destination", documentsDirectoryPath)
src := path.Clean(javadocPath)
dst := path.Clean(documentsDirectoryPath)
srcBase := path.Base(src)
filepath.Walk(src, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error walking path", "filePath", filePath)
os.Exit(1)
}
if info.IsDir() {
if path.Base(filePath) != srcBase {
// We only want to copy the directories within the source directory
// to the destination directory
directoryName := strings.Split(filePath, srcBase)[1]
err := os.MkdirAll(dst + directoryName, os.ModePerm)
if err != nil {
log.Error("Unable to create directory", "directory", directoryName)
os.Exit(1)
}
}
} else {
// Copy file
fileName := filepath.Base(filePath)
directoryName := strings.Split(filepath.Dir(filePath), srcBase)[1]
dstPath := filepath.Clean(dst + directoryName + "/" + fileName)
err = copyFileContents(filePath, dstPath)
if err != nil {
log.Error("Unable to copy file", "src", filePath, "dst", dstPath)
os.Exit(1)
}
}
return err
})
log.Info("Done!")
}
func writeInfoPlist(docsetName, docsetIndexFile, contentsDirectoryPath string) {
plistContentTemplate := "<?xml version=\"1.0\" encoding=\"UTF-8\"?><plist version=\"1.0\"><dict><key>CFBundleIdentifier</key> <string>%v</string><key>CFBundleName</key> <string>%v</string> <key>DocSetPlatformFamily</key> <string>%v</string> <key>dashIndexFilePath</key><string>%v</string><key>DashDocSetFamily</key><string>java</string><key>isDashDocset</key><true/></dict></plist>"
docsetIdentifier := firstPhraseLowerCased(docsetName)
plistContent := fmt.Sprintf(
plistContentTemplate,
docsetIdentifier,
docsetName,
docsetIdentifier,
docsetIndexFile,
)
infoPlistPath := contentsDirectoryPath + "/Info.plist"
err := writeStringToFile(plistContent, infoPlistPath)
if err != nil {
log.Error("Unable to write to plist file", "plistPath", infoPlistPath)
}
}
func initDB(resourcesDirectoryPath string, dbFunc func(*sql.DB)) {
dbPath := filepath.Clean(resourcesDirectoryPath + "/docset.dsidx")
// We don't care, we just want to remove the index
os.Remove(dbPath)
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
log.Error("Unable to create sqlite database", "destination", dbPath, "error", err)
os.Exit(1)
}
defer db.Close()
_, err = db.Exec("CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT)")
if err != nil {
log.Error("Unable to create table", "error", err)
os.Exit(1)
}
if dbFunc != nil {
dbFunc(db)
}
}
func index(indicesToIndex []string) func(db *sql.DB) {
return func(db *sql.DB) {
tx, err := db.Begin()
if err != nil {
log.Error("Unable to begin transactions for database", "error", err)
os.Exit(1)
}
stmt, err := tx.Prepare("INSERT INTO searchIndex(name, type, path) VALUES (?, ?, ?)")
if err != nil {
log.Error("Unable to create statement to insert into database", "error", err)
os.Exit(1)
}
defer stmt.Close()
added := make(map[string]bool)
for _, toIndex := range indicesToIndex {
parseIndex(toIndex, func(entry IndexEntry) {
name, elementType, path := entry.name, entry.elementType.value(), entry.path
uniqueKey := name + elementType + path
if !added[uniqueKey] {
_, err := stmt.Exec(name, elementType, path)
if err != nil {
log.Error(
"Unable to insert entry",
"name", name,
"elementType", elementType,
"path", path,
)
os.Exit(1)
}
added[uniqueKey] = true
}
})
}
tx.Commit()
}
}
/**
Utility functions
*/
func pathExists(path string) (bool, error) {
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return true, err
}
func writeStringToFile(content, dst string) error {
file, err := os.Create(dst)
if err != nil {
return err
}
defer file.Close()
_, err = file.Write([]byte(content))
return err
}
func copyFileContents(src, dst string) (err error) {
in, err := os.Open(src)
if err != nil {
return
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return
}
defer func() {
cerr := out.Close()
if err == nil {
err = cerr
}
}()
if _, err = io.Copy(out, in); err != nil {
log.Error("Error copying", "error", err)
return
}
err = out.Sync()
return
}
func firstPhraseLowerCased(s string) string {
return strings.ToLower(func() string {
return strings.Split(s, " ")[0]
}())
}

203
parser.go

@ -0,0 +1,203 @@
package main
import (
"os"
"github.com/yhat/scrape"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
type Verifier func(string) bool
type TypeEvaluator func(Verifier, Verifier) bool
type IndexEntry struct {
name string
elementType ElementType
path string
}
var ALL_ELEMENT_TYPES = []ElementType{
Class,
Method,
Field,
Constructor,
Interface,
Exception,
Error,
Enum,
Trait,
Notation,
Package,
}
var ELEMENT_TYPE_TO_TYPE_EVALUATORS = map[ElementType][]TypeEvaluator{
Class: NewTypeEvaluators(isClass),
Method: NewTypeEvaluators(isStaticMethod, isMethod),
Field: NewTypeEvaluators(isStaticField, isField),
Constructor: NewTypeEvaluators(isConstructor),
Interface: NewTypeEvaluators(isInterface),
Exception: NewTypeEvaluators(isException),
Error: NewTypeEvaluators(isError),
Enum: NewTypeEvaluators(isEnum),
Trait: NewTypeEvaluators(isTrait),
Notation: NewTypeEvaluators(isNotation),
Package: NewTypeEvaluators(isPackage),
}
func parseIndex(indexFilePath string, entryHandler func(IndexEntry)) {
log.Info("Indexing from file", "file", indexFilePath)
indexed := 0
file, err := os.OpenFile(indexFilePath, os.O_RDONLY, 0666)
if err != nil {
log.Error("Unable to open file", "file", indexFilePath, "error", err)
return
}
root, err := html.Parse(file)
if err != nil {
log.Error("Unable to parse index", "file", file, "error", err)
return
}
anchorTags := scrape.FindAll(root, scrape.ByTag(atom.A))
for _, tag := range anchorTags {
var parentTag = tag.Parent
if parentTag.FirstChild != tag {
continue
}
isParentSpan := parentTag.DataAtom == atom.Span
isParentCode := parentTag.DataAtom == atom.Code
isParentItalic := parentTag.DataAtom == atom.I
isParentBold := parentTag.DataAtom == atom.B
if isParentSpan || isParentCode || isParentItalic || isParentBold {
parentTag = parentTag.Parent
if parentTag.FirstChild != tag.Parent {
continue
}
}
if parentTag.DataAtom != atom.Dt {
continue
}
text := scrape.Text(parentTag)
var tagType ElementType = NotFound
var dtClassName = scrape.Attr(parentTag, "class")
lowercaseText := strings.ToLower(text)
textContainsInsensitive := func(s string) bool {
return strings.Contains(lowercaseText, s)
}
dtClassNameHasSuffix := func(s string) bool {
return strings.HasSuffix(dtClassName, s)
}
tagTypeDetermined := false
for _, elementType := range ALL_ELEMENT_TYPES {
typeEvaluators := ELEMENT_TYPE_TO_TYPE_EVALUATORS[elementType]
for _, evaluator := range typeEvaluators {
if evaluator(textContainsInsensitive, dtClassNameHasSuffix) {
tagType = elementType
tagTypeDetermined = true
break
}
}
if tagTypeDetermined {
break
}
}
if tagType == NotFound {
log.Warn("Warning: could not determine type", "text", text, "dtClassName", dtClassName)
continue
}
name := scrape.Text(tag)
path := scrape.Attr(tag, "href")
entryHandler(IndexEntry{name: name, elementType: tagType, path: path})
indexed++
}
log.Info("Indexed", "count", indexed)
}
func NewTypeEvaluators(a TypeEvaluator, others ...TypeEvaluator) []TypeEvaluator {
typeEvaluators := make([]TypeEvaluator, 1 + len(others))
typeEvaluators[0] = a
for i, typeEvaluator := range others {
typeEvaluators[i + 1] = typeEvaluator
}
return typeEvaluators
}
func isClass(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("class in") || textContainsInsensitive("- class") || dtClassNameHasSuffix("class")
}
func isStaticMethod(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("static method in") || dtClassNameHasSuffix("method")
}
func isStaticField(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("static variable in") || textContainsInsensitive("field in") || dtClassNameHasSuffix("field")
}
func isConstructor(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("constructor") || dtClassNameHasSuffix("constructor")
}
func isMethod(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("method in")
}
func isField(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("variable in")
}
func isInterface(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("interface in") || textContainsInsensitive("- interface") || dtClassNameHasSuffix("interface")
}
func isException(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("exception in") || textContainsInsensitive("- exception") || dtClassNameHasSuffix("exception")
}
func isError(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("error in") || textContainsInsensitive("- error") || dtClassNameHasSuffix("error")
}
func isEnum(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("enum in") || textContainsInsensitive("- enum") || dtClassNameHasSuffix("enum")
}
func isTrait(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("trait in")
}
func isNotation(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("annotation type") || dtClassNameHasSuffix("annotation")
}
func isPackage(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
return textContainsInsensitive("package") || dtClassNameHasSuffix("package")
}
Loading…
Cancel
Save