Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement a car convert subcommand #294

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
47 changes: 46 additions & 1 deletion cmd/car/car.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,38 @@ func main1() int {
Name: "car",
Usage: "Utility for working with car files",
Commands: []*cli.Command{
{
Name: "convert",
Usage: "Convert a car file to given codec",
Aliases: []string{"con"},
Action: ConvertCar,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "selector",
Aliases: []string{"s"},
Usage: "A selector over the dag",
},
},
},
{
Name: "concatenate",
Usage: "Concatenate car files",
Aliases: []string{"cat"},
Action: CatCar,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "file",
Aliases: []string{"f", "output", "o"},
Usage: "The car file to write to",
TakesFile: true,
},
&cli.IntFlag{
Name: "version",
Value: 2,
Usage: "Write output as a v1 or v2 format car",
},
},
},
{
Name: "create",
Usage: "Create a car file",
Expand Down Expand Up @@ -109,6 +141,19 @@ func main1() int {
},
},
},
{
Name: "import",
Usage: "Import a block into a car file",
Action: ImportCar,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "codec",
Aliases: []string{"c"},
Usage: "The codec the block data should be interpreted with",
Value: multicodec.DagJson.String(),
},
},
},
{
Name: "index",
Aliases: []string{"i"},
Expand All @@ -130,7 +175,7 @@ func main1() int {
},
{
Name: "list",
Aliases: []string{"l"},
Aliases: []string{"l", "ls"},
Usage: "List the CIDs in a car",
Action: ListCar,
Flags: []cli.Flag{
Expand Down
79 changes: 79 additions & 0 deletions cmd/car/concatenate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package main

import (
"fmt"
"io"
"os"

"github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/blockstore"
"github.com/urfave/cli/v2"
)

// CatCar will concatenate the blocks from a set of source car files together into a
// combined destination car file.
// The root of the destination car will be the roots of the last specified source car.
func CatCar(c *cli.Context) error {
var err error
if c.Args().Len() == 0 {
return fmt.Errorf("a least one source from must be specified")
}

if !c.IsSet("file") {
return fmt.Errorf("a file destination must be specified")
}

options := []car.Option{}
switch c.Int("version") {
case 1:
options = []car.Option{blockstore.WriteAsCarV1(true)}
case 2:
// already the default
default:
return fmt.Errorf("invalid CAR version %d", c.Int("version"))
}

// peak at final root
lst := c.Args().Get(c.Args().Len() - 1)
lstStore, err := blockstore.OpenReadOnly(lst)
if err != nil {
return err
}
roots, err := lstStore.Roots()
if err != nil {
return err
}
_ = lstStore.Close()

cdest, err := blockstore.OpenReadWrite(c.String("file"), roots, options...)
if err != nil {
return err
}

for _, src := range c.Args().Slice() {
f, err := os.Open(src)
if err != nil {
return err
}
blkRdr, err := car.NewBlockReader(f)
if err != nil {
return err
}
blk, err := blkRdr.Next()
for err != io.EOF {
if err := cdest.Put(c.Context, blk); err != nil {
return err
}
blk, err = blkRdr.Next()
if err != nil && err != io.EOF {
return err
}
}

if err := f.Close(); err != nil {
return err
}
}

return cdest.Finalize()
}
251 changes: 251 additions & 0 deletions cmd/car/convert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package main

import (
"bytes"
"fmt"
"io"
"os"

blocks "github.com/ipfs/go-block-format"
"github.com/ipfs/go-cid"
ipfsbs "github.com/ipfs/go-ipfs-blockstore"
"github.com/ipld/go-car/v2/blockstore"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/linking"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/storage/memstore"
"github.com/ipld/go-ipld-prime/traversal"
"github.com/ipld/go-ipld-prime/traversal/selector"
selectorParser "github.com/ipld/go-ipld-prime/traversal/selector/parse"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-multihash"
"github.com/urfave/cli/v2"
)

type children struct {
t int
done bool
old []cid.Cid
new []cid.Cid
}

func proxyCid(proto cidlink.LinkPrototype) (cid.Cid, error) {
// make a cid with the right length that we eventually will patch with the root.
hasher, err := multihash.GetHasher(proto.MhType)
if err != nil {
return cid.Undef, err
}
digest := hasher.Sum([]byte{})
hash, err := multihash.Encode(digest, proto.MhType)
if err != nil {
return cid.Undef, err
}
proxyRoot := cid.NewCidV1(uint64(proto.Codec), hash)
return proxyRoot, nil
}

// ConvertCar will will re-write the blocks in a car to a specified codec.
func ConvertCar(c *cli.Context) error {
if c.Args().Len() < 2 {
return fmt.Errorf("Usage: convert <source> <destination> [codec]")
}

output := c.Args().Get(1)
bs, err := blockstore.OpenReadOnly(c.Args().Get(0))
if err != nil {
return err
}
_ = os.Remove(output)

convertTo := multicodec.DagJson
codec := ""
if c.Args().Len() > 2 {
codec = c.Args().Get(2)
}
for _, candidate := range multicodec.KnownCodes() {
if candidate.String() == codec {
convertTo = candidate
}
}
proto := cidlink.LinkPrototype{
Prefix: cid.NewPrefixV1(uint64(convertTo), multihash.SHA2_256),
}
p, err := proxyCid(proto)
if err != nil {
return err
}
outStore, err := blockstore.OpenReadWrite(output, []cid.Cid{p}, blockstore.AllowDuplicatePuts(false))
if err != nil {
return err
}
outls := cidlink.DefaultLinkSystem()
outls.TrustedStorage = true
outls.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) {
buf := bytes.NewBuffer(nil)
return buf, func(l datamodel.Link) error {
c := l.(cidlink.Link).Cid
blk, _ := blocks.NewBlockWithCid(buf.Bytes(), c)
return outStore.Put(lc.Ctx, blk)
}, nil
}

roots, err := bs.Roots()
if err != nil {
return err
}
if len(roots) != 1 {
return fmt.Errorf("car file has does not have exactly one root, dag root must be specified explicitly")
}
rootCid := roots[0]

sel := selectorParser.CommonSelector_MatchAllRecursively
if c.IsSet("selector") {
sel, err = selectorParser.ParseJSONSelector(c.String("selector"))
if err != nil {
return err
}
}
linkVisitOnlyOnce := !c.IsSet("selector") // if using a custom selector, this isn't as safe

workMap := make(map[cid.Cid]*children)
tempStore := memstore.Store{}

// Step 1: traverse blocks into tempstore. populate workmap.
ls := cidlink.DefaultLinkSystem()
ls.TrustedStorage = true
ls.StorageReadOpener = func(_ linking.LinkContext, l datamodel.Link) (io.Reader, error) {
if cl, ok := l.(cidlink.Link); ok {
blk, err := bs.Get(c.Context, cl.Cid)
if err != nil {
if err == ipfsbs.ErrNotFound {
return nil, traversal.SkipMe{}
}
return nil, err
}
if err := tempStore.Put(c.Context, cl.Cid.String(), blk.RawData()); err != nil {
return nil, err
}
workMap[cl.Cid] = &children{}
return bytes.NewBuffer(blk.RawData()), nil
}
return nil, fmt.Errorf("unknown link type: %T", l)
}

nsc := func(lnk datamodel.Link, lctx ipld.LinkContext) (datamodel.NodePrototype, error) {
if lnk, ok := lnk.(cidlink.Link); ok && lnk.Cid.Prefix().Codec == 0x70 {
return dagpb.Type.PBNode, nil
}
return basicnode.Prototype.Any, nil
}

rootLink := cidlink.Link{Cid: rootCid}
ns, _ := nsc(rootLink, ipld.LinkContext{})
rootNode, err := ls.Load(ipld.LinkContext{}, rootLink, ns)
if err != nil {
return err
}

traversalProgress := traversal.Progress{
Cfg: &traversal.Config{
LinkSystem: ls,
LinkTargetNodePrototypeChooser: nsc,
LinkVisitOnlyOnce: linkVisitOnlyOnce,
},
}

s, err := selector.CompileSelector(sel)
if err != nil {
return err
}

err = traversalProgress.WalkAdv(rootNode, s, func(traversal.Progress, datamodel.Node, traversal.VisitReason) error { return nil })
if err != nil {
return err
}

// Step 2: traverse workmap and load blocks to get old children.
for blkCid := range workMap {
old := make([]cid.Cid, 0)
lnk := cidlink.Link{Cid: blkCid}
ns, _ = nsc(lnk, ipld.LinkContext{})
node, err := ls.Load(ipld.LinkContext{}, lnk, ns)
if err != nil {
return err
}
traversal.WalkLocal(node, func(p traversal.Progress, n datamodel.Node) error {
if n.Kind() == datamodel.Kind_Link {
nlk, _ := n.AsLink()
old = append(old, nlk.(cidlink.Link).Cid)
}
return nil
})
child := children{t: 0, done: false, old: old, new: make([]cid.Cid, len(old))}
workMap[blkCid] = &child
}

// Step 3: for nodes with no-uncoverted children, transform the node, and convert.
done := 0
xar, _ := selector.CompileSelector(selectorParser.CommonSelector_ExploreAllRecursively)
for done < len(workMap) {
for c := range workMap {
if workMap[c].t == len(workMap[c].old) && !workMap[c].done {
v := workMap[c]
var newRoot ipld.Node
lnk := cidlink.Link{Cid: c}
ns, _ = nsc(lnk, ipld.LinkContext{})
oldRoot, err := ls.Load(ipld.LinkContext{}, lnk, ns)
if err != nil {
return err
}
if len(v.old) == 0 {
// shortcut on leaf nodes.
newRoot = oldRoot
} else {
// Step 3.1: transform the node using old->new map
newRoot, err = traversal.WalkTransforming(oldRoot, xar, func(p traversal.Progress, n datamodel.Node) (datamodel.Node, error) {
if n.Kind() == datamodel.Kind_Link {
nlk, _ := n.AsLink()
oldCid := nlk.(cidlink.Link).Cid
for i, c := range v.old {
if c.Equals(oldCid) {
newLk := basicnode.NewLink(cidlink.Link{Cid: v.new[i]})
return newLk, nil
}
}
return nil, fmt.Errorf("could not find link %s in workmap: %v", oldCid, v.old)
}
return n, nil
})
if err != nil {
return err
}
}
// Step 3.2: serialize into output datastore
newLnk, err := outls.Store(ipld.LinkContext{}, proto, newRoot)
if err != nil {
return err
}
newCid := newLnk.(cidlink.Link).Cid

// Step 3.3: update workmap indicating parents should transform this child.
for d := range workMap {
for i, o := range workMap[d].old {
if o.Equals(c) {
(*workMap[d]).new[i] = newCid
(*workMap[d]).t++
}
}
}

(*workMap[c]).done = true
done++
}
}
}

return outStore.Finalize()
// todo: fix up root cid
}