mirror of
https://github.com/Luzifer/clean_couch.git
synced 2025-01-01 23:51:23 +00:00
Initial version
This commit is contained in:
commit
de2e522e1a
4 changed files with 168 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
clean_couch
|
13
LICENSE
Normal file
13
LICENSE
Normal file
|
@ -0,0 +1,13 @@
|
|||
Copyright 2015 Knut Ahlers <knut@ahlers.me>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
# Luzifer / clean_couch
|
||||
|
||||
[![License: Apache 2.0](http://badge.luzifer.io/v1/badge?color=5d79b5&title=license&text=Apache%202.0)](http://www.apache.org/licenses/LICENSE-2.0)
|
||||
|
||||
This utility emerged from the need to delete about 20k documents from a CouchDB database with more than 600k documents. As I did not want to delete every document by hand and had no other way to delete documents by a specific filter.
|
||||
|
||||
## Usage
|
||||
|
||||
1. Create a view which filters the documents in your database with exactly this emit line you can see in this example
|
||||
|
||||
```javascript
|
||||
function(doc) {
|
||||
if (doc.user == "usertodelete") {
|
||||
emit(doc._rev, null);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. Execute with parameters
|
||||
|
||||
```bash
|
||||
# ./clean_couch
|
||||
Usage of ./clean_couch:
|
||||
--baseurl="http://localhost:5984": BaseURL of your CouchDB instance
|
||||
--concurrency=50: How many delete requests should get processed concurrently?
|
||||
--database="": The database containing your view and the data to delete
|
||||
--view="": The view selecting the data to delete
|
||||
|
||||
# ./clean_couch --database=userdata --view=_design/del/_view/usertodelete
|
||||
```
|
||||
|
||||
## Warnings
|
||||
|
||||
- If you set the concurrency above 1024 either `clean_couch` or even the CouchDB server might break because of a limit in open file descriptors
|
||||
- If the database has many views you could overload your server because views need to get recalculated
|
||||
(My CouchDB server survived a concurrency of 100 with minimal load)
|
118
main.go
Normal file
118
main.go
Normal file
|
@ -0,0 +1,118 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
var config = struct {
|
||||
CouchBaseURL string
|
||||
Database string
|
||||
View string
|
||||
Routines int
|
||||
|
||||
// Private storage
|
||||
totalNumberOfDocuments int
|
||||
processedDocuments int
|
||||
processChannel chan bool
|
||||
concurrencyChannel chan bool
|
||||
}{
|
||||
processChannel: make(chan bool, 10),
|
||||
}
|
||||
|
||||
func main() {
|
||||
pflag.StringVar(&config.CouchBaseURL, "baseurl", "http://localhost:5984", "BaseURL of your CouchDB instance")
|
||||
pflag.StringVar(&config.Database, "database", "", "The database containing your view and the data to delete")
|
||||
pflag.StringVar(&config.View, "view", "", "The view selecting the data to delete")
|
||||
pflag.IntVar(&config.Routines, "concurrency", 20, "How many delete requests should get processed concurrently?")
|
||||
pflag.Parse()
|
||||
|
||||
if config.Database == "" || config.View == "" {
|
||||
pflag.Usage()
|
||||
return
|
||||
}
|
||||
|
||||
delData := struct {
|
||||
Rows []struct {
|
||||
ID string `json:"id"`
|
||||
Rev string `json:"key"`
|
||||
} `json:"rows"`
|
||||
}{}
|
||||
|
||||
err := backoff.Retry(func() error {
|
||||
req, _ := http.NewRequest("GET", fmt.Sprintf("%s/%s/%s", config.CouchBaseURL, config.Database, config.View), nil)
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if err := json.NewDecoder(res.Body).Decode(&delData); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}, backoff.NewExponentialBackOff())
|
||||
if err != nil {
|
||||
fmt.Printf("Tried to get the view but did not succeed: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
config.totalNumberOfDocuments = len(delData.Rows)
|
||||
config.processedDocuments = 0
|
||||
|
||||
config.concurrencyChannel = make(chan bool, config.Routines)
|
||||
|
||||
go func() {
|
||||
for _, row := range delData.Rows {
|
||||
// Blocks when concurrency channel is full
|
||||
config.concurrencyChannel <- true
|
||||
|
||||
go func(finChan chan bool, conChan chan bool, id, rev string) {
|
||||
// Retry deletes
|
||||
bo := backoff.NewExponentialBackOff()
|
||||
bo.InitialInterval = 5 * time.Second
|
||||
|
||||
err := backoff.Retry(func() error {
|
||||
url := fmt.Sprintf("%s/%s/%s?rev=%s", config.CouchBaseURL, config.Database, id, rev)
|
||||
req, _ := http.NewRequest("DELETE", url, nil)
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
res.Body.Close()
|
||||
return nil
|
||||
}, bo)
|
||||
if err != nil {
|
||||
fmt.Printf("Unable to delete document with ID %s", id)
|
||||
}
|
||||
// Increase finished counter
|
||||
finChan <- true
|
||||
|
||||
// Remove self from concurrency limit
|
||||
<-conChan
|
||||
}(config.processChannel, config.concurrencyChannel, row.ID, row.Rev)
|
||||
}
|
||||
}()
|
||||
|
||||
ticker := time.NewTicker(time.Second)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-config.processChannel:
|
||||
config.processedDocuments++
|
||||
if config.processedDocuments == config.totalNumberOfDocuments {
|
||||
fmt.Print("\n\n")
|
||||
return
|
||||
}
|
||||
case <-ticker.C:
|
||||
fmt.Printf("Processed %d of %d documents.\r", config.processedDocuments, config.totalNumberOfDocuments)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in a new issue