aboutsummaryrefslogtreecommitdiffstats
path: root/home/fast-p/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'home/fast-p/main.go')
-rw-r--r--home/fast-p/main.go147
1 files changed, 147 insertions, 0 deletions
diff --git a/home/fast-p/main.go b/home/fast-p/main.go
new file mode 100644
index 0000000..409c2c2
--- /dev/null
+++ b/home/fast-p/main.go
@@ -0,0 +1,147 @@
+package main
+
+import (
+ "bufio"
+ "encoding/hex"
+ "flag"
+ "fmt"
+ "github.com/boltdb/bolt"
+ "github.com/cespare/xxhash"
+ "github.com/mitchellh/go-homedir"
+ "io"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+)
+
+func hash_file_xxhash(filePath string) (string, error) {
+ var returnMD5String string
+ file, err := os.Open(filePath)
+ if err != nil {
+ return returnMD5String, err
+ }
+ defer file.Close()
+ hash := xxhash.New()
+ if _, err := io.Copy(hash, file); err != nil {
+ return returnMD5String, err
+ }
+ hashInBytes := hash.Sum(nil)[:]
+ returnMD5String = hex.EncodeToString(hashInBytes)
+ return returnMD5String, nil
+
+}
+
+func main() {
+ flag.Usage = func() {
+ fmt.Printf(`Usage: fast-p [OPTIONS]
+ Reads a list of PDF filenames from STDIN and returns a list of null-byte
+ separated items of the form
+ filename[TAB]text
+ where "text" is the text extracted from the first two pages of the PDF
+ by pdftotext and [TAB] denotes a tab character "\t".
+
+ Common usage of this tool is to pipe the result to FZF with a command in
+ your .bashrc as explained in https://github.com/bellecp/fast-p.
+
+
+`)
+ flag.PrintDefaults()
+ }
+ version := flag.Bool("version", false, "Display program version")
+ clearCache := flag.Bool("clear-cache", false, "Delete cache file located at: \n~/.cache/fast-p-pdftotext-output/fast-p_cached_pdftotext_output.db")
+ flag.Parse()
+
+ if *version != false {
+ fmt.Printf("v.0.2.5 \nhttps://github.com/bellecp/fast-p\n")
+ os.Exit(0)
+ }
+
+ if *clearCache != false {
+ removePath, err := homedir.Expand("~/.cache/fast-p-pdftotext-output/fast-p_cached_pdftotext_output.db")
+ if err != nil {
+ log.Fatal(err)
+ os.Exit(1)
+ }
+ os.Remove(removePath)
+ os.Exit(0)
+ }
+
+ // Create ~/.cache folder if does not exist
+ // https://stackoverflow.com/questions/37932551/mkdir-if-not-exists-using-golang
+ cachePath, err := homedir.Expand("~/.cache/fast-p-pdftotext-output/")
+ os.MkdirAll(cachePath, os.ModePerm)
+
+ // open BoltDB cache database
+ scanner := bufio.NewScanner(os.Stdin)
+ boltDbFilepath := filepath.Join(cachePath, "fast-p_cached_pdftotext_output.db")
+ if err != nil {
+ log.Fatal(err)
+ }
+ db, err := bolt.Open(boltDbFilepath, 0600, nil)
+ bucketName := "fast-p_bucket_for_cached_pdftotext_output"
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer db.Close()
+
+ nullByte := "\u0000"
+
+ db.Update(func(tx *bolt.Tx) error {
+ _, err := tx.CreateBucketIfNotExists([]byte(bucketName))
+ if err != nil {
+ return fmt.Errorf("create bucket: %s", err)
+ }
+ return nil
+ })
+
+ missing := make(map[string]string)
+ alreadySeen := make(map[string]bool)
+
+ for scanner.Scan() {
+ filepath := scanner.Text()
+ hash, err := hash_file_xxhash(filepath)
+ if alreadySeen[hash] != true {
+ alreadySeen[hash] = true
+ if err != nil {
+ log.Println("err", hash)
+ }
+ var content string
+ found := false
+ err2 := db.View(func(tx *bolt.Tx) error {
+ b := tx.Bucket([]byte(bucketName))
+ v := b.Get([]byte(hash))
+ if v != nil {
+ found = true
+ content = string(v)
+ }
+ return nil
+ })
+ if err2 != nil {
+ log.Println(err2)
+ }
+ if found == true {
+ fmt.Println(filepath + "\t" + content + nullByte)
+ } else {
+ missing[hash] = filepath
+ }
+ }
+ }
+ for hash, filepath := range missing {
+ cmd := exec.Command("pdftotext", "-l", "2", filepath, "-")
+ out, err := cmd.CombinedOutput()
+ content := string(out)
+ if err != nil {
+ log.Println(err)
+ }
+ fmt.Println(filepath + "\t" + content + nullByte)
+ db.Update(func(tx *bolt.Tx) error {
+ b := tx.Bucket([]byte(bucketName))
+ err := b.Put([]byte(hash), []byte(content))
+ if err != nil {
+ fmt.Println(err)
+ }
+ return nil
+ })
+ }
+}