Awesome
adam-gfa
Graphical Fragment Assembly (GFA) support for ADAM.
Hacking adam-gfa
Install
- JDK 1.8 or later, http://openjdk.java.net
- Apache Maven 3.3.9 or later, http://maven.apache.org
- Apache Spark 3.1.2 or later, built for Scala 2.12 http://spark.apache.org
To build
$ mvn install
Running adam-gfa
Transform GFA 1.0 to generic Gfa1Record
records in Parquet format
$ spark-submit \
--class com.github.heuermh.adam.gfa.Gfa1ToDataframe \
target/adam-gfa_2.12-${version}.jar \
in.gfa \
out.parquet
Transform GFA 1.0 to specific Containment
, Link
, Path
, Segment
, and Traversal
records in Parquet format
$ spark-submit \
--class com.github.heuermh.adam.gfa.Gfa1ToDataframes \
target/adam-gfa_2.12-${version}.jar \
in.gfa \
out
(creates separate out.containments.parquet
, out.links.parquet
, out.paths.parquet
, out.segments.parquet
, and out.traversals.parquet
directories)
Graphical Fragment Assembly (GFA) version 1.0 schema in Parquet format
Gfa1Record
message spark_schema {
optional binary recordType (STRING);
optional binary name (STRING);
optional binary sequence (STRING);
optional int32 length;
optional int32 readCount;
optional int32 fragmentCount;
optional int32 kmerCount;
optional binary sequenceChecksum (STRING);
optional binary sequenceUri (STRING);
optional binary stableName (STRING);
optional int32 stableOffset;
optional int32 stableRank;
optional binary id (STRING);
optional group source {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional group target {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional binary overlap (STRING);
optional int32 mappingQuality;
optional int32 mismatchCount;
optional binary pathName (STRING);
optional group segments (LIST) {
repeated group list {
optional group element {
optional binary id (STRING);
optional binary orientation (STRING);
}
}
}
optional group overlaps (LIST) {
repeated group list {
optional binary element (STRING);
}
}
optional int32 ordinal;
optional group container {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional group contained {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional int32 position;
optional group annotations (MAP) {
repeated group key_value {
required binary key (STRING);
optional group value {
optional binary name (STRING);
optional binary type (STRING);
optional binary value (STRING);
}
}
}
}
Containment
message spark_schema {
optional binary id (STRING);
optional group container {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional group contained {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional int32 position;
optional binary overlap (STRING);
optional int32 mismatchCount;
optional int32 readCount;
optional group annotations (MAP) {
repeated group key_value {
required binary key (STRING);
optional group value {
optional binary name (STRING);
optional binary type (STRING);
optional binary value (STRING);
}
}
}
}
Link
message spark_schema {
optional binary id (STRING);
optional group source {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional group target {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional binary overlap (STRING);
optional int32 mappingQuality;
optional int32 mismatchCount;
optional int32 readCount;
optional int32 fragmentCount;
optional int32 kmerCount;
optional group annotations (MAP) {
repeated group key_value {
required binary key (STRING);
optional group value {
optional binary name (STRING);
optional binary type (STRING);
optional binary value (STRING);
}
}
}
}
Path
message spark_schema {
optional binary pathName (STRING);
optional group segments (LIST) {
repeated group list {
optional group element {
optional binary id (STRING);
optional binary orientation (STRING);
}
}
}
optional group overlaps (LIST) {
repeated group list {
optional binary element (STRING);
}
}
optional group annotations (MAP) {
repeated group key_value {
required binary key (STRING);
optional group value {
optional binary name (STRING);
optional binary type (STRING);
optional binary value (STRING);
}
}
}
}
Segment
message spark_schema {
optional binary name (STRING);
optional binary sequence (STRING);
optional int32 length;
optional int32 readCount;
optional int32 fragmentCount;
optional int32 kmerCount;
optional binary sequenceChecksum (STRING);
optional binary sequenceUri (STRING);
optional binary stableName (STRING);
optional int32 stableOffset;
optional int32 stableRank;
optional group annotations (MAP) {
repeated group key_value {
required binary key (STRING);
optional group value {
optional binary name (STRING);
optional binary type (STRING);
optional binary value (STRING);
}
}
}
}
Traversal
message spark_schema {
optional binary id (STRING);
optional binary pathName (STRING);
optional int32 ordinal;
optional group source {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional group target {
optional binary id (STRING);
optional binary orientation (STRING);
}
optional binary overlap (STRING);
optional group annotations (MAP) {
repeated group key_value {
required binary key (STRING);
optional group value {
optional binary name (STRING);
optional binary type (STRING);
optional binary value (STRING);
}
}
}
}