1
0
Fork 0
mirror of https://github.com/Luzifer/clean_couch.git synced 2024-12-22 02:31:18 +00:00

Initial version

This commit is contained in:
Knut Ahlers 2015-06-20 23:12:29 +02:00
commit de2e522e1a
4 changed files with 168 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
clean_couch

13
LICENSE Normal file
View file

@ -0,0 +1,13 @@
Copyright 2015 Knut Ahlers <knut@ahlers.me>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

36
README.md Normal file
View file

@ -0,0 +1,36 @@
# Luzifer / clean_couch
[![License: Apache 2.0](http://badge.luzifer.io/v1/badge?color=5d79b5&title=license&text=Apache%202.0)](http://www.apache.org/licenses/LICENSE-2.0)
This utility emerged from the need to delete about 20k documents from a CouchDB database with more than 600k documents. As I did not want to delete every document by hand and had no other way to delete documents by a specific filter.
## Usage
1. Create a view which filters the documents in your database with exactly this emit line you can see in this example
```javascript
function(doc) {
if (doc.user == "usertodelete") {
emit(doc._rev, null);
}
}
```
2. Execute with parameters
```bash
# ./clean_couch
Usage of ./clean_couch:
--baseurl="http://localhost:5984": BaseURL of your CouchDB instance
--concurrency=50: How many delete requests should get processed concurrently?
--database="": The database containing your view and the data to delete
--view="": The view selecting the data to delete
# ./clean_couch --database=userdata --view=_design/del/_view/usertodelete
```
## Warnings
- If you set the concurrency above 1024 either `clean_couch` or even the CouchDB server might break because of a limit in open file descriptors
- If the database has many views you could overload your server because views need to get recalculated
(My CouchDB server survived a concurrency of 100 with minimal load)

118
main.go Normal file
View file

@ -0,0 +1,118 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/cenkalti/backoff"
"github.com/spf13/pflag"
)
var config = struct {
CouchBaseURL string
Database string
View string
Routines int
// Private storage
totalNumberOfDocuments int
processedDocuments int
processChannel chan bool
concurrencyChannel chan bool
}{
processChannel: make(chan bool, 10),
}
func main() {
pflag.StringVar(&config.CouchBaseURL, "baseurl", "http://localhost:5984", "BaseURL of your CouchDB instance")
pflag.StringVar(&config.Database, "database", "", "The database containing your view and the data to delete")
pflag.StringVar(&config.View, "view", "", "The view selecting the data to delete")
pflag.IntVar(&config.Routines, "concurrency", 20, "How many delete requests should get processed concurrently?")
pflag.Parse()
if config.Database == "" || config.View == "" {
pflag.Usage()
return
}
delData := struct {
Rows []struct {
ID string `json:"id"`
Rev string `json:"key"`
} `json:"rows"`
}{}
err := backoff.Retry(func() error {
req, _ := http.NewRequest("GET", fmt.Sprintf("%s/%s/%s", config.CouchBaseURL, config.Database, config.View), nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer res.Body.Close()
if err := json.NewDecoder(res.Body).Decode(&delData); err != nil {
return err
}
return nil
}, backoff.NewExponentialBackOff())
if err != nil {
fmt.Printf("Tried to get the view but did not succeed: %s", err)
return
}
config.totalNumberOfDocuments = len(delData.Rows)
config.processedDocuments = 0
config.concurrencyChannel = make(chan bool, config.Routines)
go func() {
for _, row := range delData.Rows {
// Blocks when concurrency channel is full
config.concurrencyChannel <- true
go func(finChan chan bool, conChan chan bool, id, rev string) {
// Retry deletes
bo := backoff.NewExponentialBackOff()
bo.InitialInterval = 5 * time.Second
err := backoff.Retry(func() error {
url := fmt.Sprintf("%s/%s/%s?rev=%s", config.CouchBaseURL, config.Database, id, rev)
req, _ := http.NewRequest("DELETE", url, nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
res.Body.Close()
return nil
}, bo)
if err != nil {
fmt.Printf("Unable to delete document with ID %s", id)
}
// Increase finished counter
finChan <- true
// Remove self from concurrency limit
<-conChan
}(config.processChannel, config.concurrencyChannel, row.ID, row.Rev)
}
}()
ticker := time.NewTicker(time.Second)
for {
select {
case <-config.processChannel:
config.processedDocuments++
if config.processedDocuments == config.totalNumberOfDocuments {
fmt.Print("\n\n")
return
}
case <-ticker.C:
fmt.Printf("Processed %d of %d documents.\r", config.processedDocuments, config.totalNumberOfDocuments)
}
}
}