mirror of
https://github.com/Luzifer/clean_couch.git
synced 2024-12-22 10:41:20 +00:00
Initial version
This commit is contained in:
commit
de2e522e1a
4 changed files with 168 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
clean_couch
|
13
LICENSE
Normal file
13
LICENSE
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
Copyright 2015 Knut Ahlers <knut@ahlers.me>
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# Luzifer / clean_couch
|
||||||
|
|
||||||
|
[![License: Apache 2.0](http://badge.luzifer.io/v1/badge?color=5d79b5&title=license&text=Apache%202.0)](http://www.apache.org/licenses/LICENSE-2.0)
|
||||||
|
|
||||||
|
This utility emerged from the need to delete about 20k documents from a CouchDB database with more than 600k documents. As I did not want to delete every document by hand and had no other way to delete documents by a specific filter.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Create a view which filters the documents in your database with exactly this emit line you can see in this example
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
function(doc) {
|
||||||
|
if (doc.user == "usertodelete") {
|
||||||
|
emit(doc._rev, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Execute with parameters
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# ./clean_couch
|
||||||
|
Usage of ./clean_couch:
|
||||||
|
--baseurl="http://localhost:5984": BaseURL of your CouchDB instance
|
||||||
|
--concurrency=50: How many delete requests should get processed concurrently?
|
||||||
|
--database="": The database containing your view and the data to delete
|
||||||
|
--view="": The view selecting the data to delete
|
||||||
|
|
||||||
|
# ./clean_couch --database=userdata --view=_design/del/_view/usertodelete
|
||||||
|
```
|
||||||
|
|
||||||
|
## Warnings
|
||||||
|
|
||||||
|
- If you set the concurrency above 1024 either `clean_couch` or even the CouchDB server might break because of a limit in open file descriptors
|
||||||
|
- If the database has many views you could overload your server because views need to get recalculated
|
||||||
|
(My CouchDB server survived a concurrency of 100 with minimal load)
|
118
main.go
Normal file
118
main.go
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/cenkalti/backoff"
|
||||||
|
"github.com/spf13/pflag"
|
||||||
|
)
|
||||||
|
|
||||||
|
var config = struct {
|
||||||
|
CouchBaseURL string
|
||||||
|
Database string
|
||||||
|
View string
|
||||||
|
Routines int
|
||||||
|
|
||||||
|
// Private storage
|
||||||
|
totalNumberOfDocuments int
|
||||||
|
processedDocuments int
|
||||||
|
processChannel chan bool
|
||||||
|
concurrencyChannel chan bool
|
||||||
|
}{
|
||||||
|
processChannel: make(chan bool, 10),
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
pflag.StringVar(&config.CouchBaseURL, "baseurl", "http://localhost:5984", "BaseURL of your CouchDB instance")
|
||||||
|
pflag.StringVar(&config.Database, "database", "", "The database containing your view and the data to delete")
|
||||||
|
pflag.StringVar(&config.View, "view", "", "The view selecting the data to delete")
|
||||||
|
pflag.IntVar(&config.Routines, "concurrency", 20, "How many delete requests should get processed concurrently?")
|
||||||
|
pflag.Parse()
|
||||||
|
|
||||||
|
if config.Database == "" || config.View == "" {
|
||||||
|
pflag.Usage()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
delData := struct {
|
||||||
|
Rows []struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Rev string `json:"key"`
|
||||||
|
} `json:"rows"`
|
||||||
|
}{}
|
||||||
|
|
||||||
|
err := backoff.Retry(func() error {
|
||||||
|
req, _ := http.NewRequest("GET", fmt.Sprintf("%s/%s/%s", config.CouchBaseURL, config.Database, config.View), nil)
|
||||||
|
res, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer res.Body.Close()
|
||||||
|
|
||||||
|
if err := json.NewDecoder(res.Body).Decode(&delData); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}, backoff.NewExponentialBackOff())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Tried to get the view but did not succeed: %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
config.totalNumberOfDocuments = len(delData.Rows)
|
||||||
|
config.processedDocuments = 0
|
||||||
|
|
||||||
|
config.concurrencyChannel = make(chan bool, config.Routines)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for _, row := range delData.Rows {
|
||||||
|
// Blocks when concurrency channel is full
|
||||||
|
config.concurrencyChannel <- true
|
||||||
|
|
||||||
|
go func(finChan chan bool, conChan chan bool, id, rev string) {
|
||||||
|
// Retry deletes
|
||||||
|
bo := backoff.NewExponentialBackOff()
|
||||||
|
bo.InitialInterval = 5 * time.Second
|
||||||
|
|
||||||
|
err := backoff.Retry(func() error {
|
||||||
|
url := fmt.Sprintf("%s/%s/%s?rev=%s", config.CouchBaseURL, config.Database, id, rev)
|
||||||
|
req, _ := http.NewRequest("DELETE", url, nil)
|
||||||
|
res, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
res.Body.Close()
|
||||||
|
return nil
|
||||||
|
}, bo)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Unable to delete document with ID %s", id)
|
||||||
|
}
|
||||||
|
// Increase finished counter
|
||||||
|
finChan <- true
|
||||||
|
|
||||||
|
// Remove self from concurrency limit
|
||||||
|
<-conChan
|
||||||
|
}(config.processChannel, config.concurrencyChannel, row.ID, row.Rev)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
ticker := time.NewTicker(time.Second)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-config.processChannel:
|
||||||
|
config.processedDocuments++
|
||||||
|
if config.processedDocuments == config.totalNumberOfDocuments {
|
||||||
|
fmt.Print("\n\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case <-ticker.C:
|
||||||
|
fmt.Printf("Processed %d of %d documents.\r", config.processedDocuments, config.totalNumberOfDocuments)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in a new issue