Use pbfparser instead of XML parser in map-creator

This commit is contained in:
Manuel Fuhr 2021-10-09 08:05:52 +02:00
parent 11a9843f41
commit 78f33ee479
6 changed files with 108 additions and 296 deletions

View file

@ -2,11 +2,12 @@ plugins {
id 'java-library'
}
dependencies {
implementation project(':brouter-codec')
implementation project(':brouter-util')
implementation project(':brouter-expressions')
implementation group: 'org.openstreetmap.osmosis', name: 'osmosis-osm-binary', version: '0.48.3'
testImplementation('junit:junit:4.13.1')
}

View file

@ -0,0 +1,258 @@
package btools.mapcreator;
import com.google.protobuf.InvalidProtocolBufferException;
import org.openstreetmap.osmosis.osmbinary.Fileformat;
import org.openstreetmap.osmosis.osmbinary.Osmformat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import btools.util.LongList;
/**
* Converts PBF block data into decoded entities ready to be passed into an Osmosis pipeline. This
* class is designed to be passed into a pool of worker threads to allow multi-threaded decoding.
* <p/>
*
* @author Brett Henderson
*/
public class BPbfBlobDecoder {
private String blobType;
private byte[] rawBlob;
private OsmParser parser;
/**
* Creates a new instance.
* <p/>
*
* @param blobType The type of blob.
* @param rawBlob The raw data of the blob.
* @param listener The listener for receiving decoding results.
*/
public BPbfBlobDecoder(String blobType, byte[] rawBlob, OsmParser parser) {
this.blobType = blobType;
this.rawBlob = rawBlob;
this.parser = parser;
}
public void process() throws Exception {
if ("OSMHeader".equals(blobType)) {
processOsmHeader(readBlobContent());
} else if ("OSMData".equals(blobType)) {
processOsmPrimitives(readBlobContent());
} else {
System.out.println("Skipping unrecognised blob type " + blobType);
}
}
private byte[] readBlobContent() throws IOException {
Fileformat.Blob blob = Fileformat.Blob.parseFrom(rawBlob);
byte[] blobData;
if (blob.hasRaw()) {
blobData = blob.getRaw().toByteArray();
} else if (blob.hasZlibData()) {
Inflater inflater = new Inflater();
inflater.setInput(blob.getZlibData().toByteArray());
blobData = new byte[blob.getRawSize()];
try {
inflater.inflate(blobData);
} catch (DataFormatException e) {
throw new RuntimeException("Unable to decompress PBF blob.", e);
}
if (!inflater.finished()) {
throw new RuntimeException("PBF blob contains incomplete compressed data.");
}
} else {
throw new RuntimeException("PBF blob uses unsupported compression, only raw or zlib may be used.");
}
return blobData;
}
private void processOsmHeader(byte[] data) throws InvalidProtocolBufferException {
Osmformat.HeaderBlock header = Osmformat.HeaderBlock.parseFrom(data);
// Build the list of active and unsupported features in the file.
List<String> supportedFeatures = Arrays.asList("OsmSchema-V0.6", "DenseNodes");
List<String> activeFeatures = new ArrayList<>();
List<String> unsupportedFeatures = new ArrayList<>();
for (String feature : header.getRequiredFeaturesList()) {
if (supportedFeatures.contains(feature)) {
activeFeatures.add(feature);
} else {
unsupportedFeatures.add(feature);
}
}
// We can't continue if there are any unsupported features. We wait
// until now so that we can display all unsupported features instead of
// just the first one we encounter.
if (unsupportedFeatures.size() > 0) {
throw new RuntimeException("PBF file contains unsupported features " + unsupportedFeatures);
}
}
private Map<String, String> buildTags(List<Integer> keys, List<Integer> values, BPbfFieldDecoder fieldDecoder) {
Iterator<Integer> keyIterator = keys.iterator();
Iterator<Integer> valueIterator = values.iterator();
if (keyIterator.hasNext()) {
Map<String, String> tags = new HashMap<>();
while (keyIterator.hasNext()) {
String key = fieldDecoder.decodeString(keyIterator.next());
String value = fieldDecoder.decodeString(valueIterator.next());
tags.put(key, value);
}
return tags;
}
return null;
}
private void processNodes(List<Osmformat.Node> nodes, BPbfFieldDecoder fieldDecoder) {
for (Osmformat.Node node : nodes) {
Map<String, String> tags = buildTags(node.getKeysList(), node.getValsList(), fieldDecoder);
parser.addNode(node.getId(), tags, fieldDecoder.decodeLatitude(node
.getLat()), fieldDecoder.decodeLatitude(node.getLon()));
}
}
private void processNodes(Osmformat.DenseNodes nodes, BPbfFieldDecoder fieldDecoder) {
List<Long> idList = nodes.getIdList();
List<Long> latList = nodes.getLatList();
List<Long> lonList = nodes.getLonList();
Iterator<Integer> keysValuesIterator = nodes.getKeysValsList().iterator();
long nodeId = 0;
long latitude = 0;
long longitude = 0;
for (int i = 0; i < idList.size(); i++) {
// Delta decode node fields.
nodeId += idList.get(i);
latitude += latList.get(i);
longitude += lonList.get(i);
// Build the tags. The key and value string indexes are sequential
// in the same PBF array. Each set of tags is delimited by an index
// with a value of 0.
Map<String, String> tags = null;
while (keysValuesIterator.hasNext()) {
int keyIndex = keysValuesIterator.next();
if (keyIndex == 0) {
break;
}
int valueIndex = keysValuesIterator.next();
if (tags == null) {
tags = new HashMap<>();
}
tags.put(fieldDecoder.decodeString(keyIndex), fieldDecoder.decodeString(valueIndex));
}
parser.addNode(nodeId, tags, ((double) latitude) / 10000000, ((double) longitude) / 10000000);
}
}
private void processWays(List<Osmformat.Way> ways, BPbfFieldDecoder fieldDecoder) {
for (Osmformat.Way way : ways) {
Map<String, String> tags = buildTags(way.getKeysList(), way.getValsList(), fieldDecoder);
// Build up the list of way nodes for the way. The node ids are
// delta encoded meaning that each id is stored as a delta against
// the previous one.
long nodeId = 0;
LongList wayNodes = new LongList(16);
for (long nodeIdOffset : way.getRefsList()) {
nodeId += nodeIdOffset;
wayNodes.add(nodeId);
}
parser.addWay(way.getId(), tags, wayNodes);
}
}
private LongList fromWid;
private LongList toWid;
private LongList viaNid;
private LongList addLong(LongList ll, long l) {
if (ll == null) {
ll = new LongList(1);
}
ll.add(l);
return ll;
}
private LongList buildRelationMembers(
List<Long> memberIds, List<Integer> memberRoles, List<Osmformat.Relation.MemberType> memberTypes,
BPbfFieldDecoder fieldDecoder) {
LongList wayIds = new LongList(16);
fromWid = toWid = viaNid = null;
Iterator<Long> memberIdIterator = memberIds.iterator();
Iterator<Integer> memberRoleIterator = memberRoles.iterator();
Iterator<Osmformat.Relation.MemberType> memberTypeIterator = memberTypes.iterator();
// Build up the list of relation members for the way. The member ids are
// delta encoded meaning that each id is stored as a delta against
// the previous one.
long refId = 0;
while (memberIdIterator.hasNext()) {
Osmformat.Relation.MemberType memberType = memberTypeIterator.next();
refId += memberIdIterator.next();
String role = fieldDecoder.decodeString(memberRoleIterator.next());
if (memberType == Osmformat.Relation.MemberType.WAY) { // currently just waymembers
wayIds.add(refId);
if ("from".equals(role)) fromWid = addLong(fromWid, refId);
if ("to".equals(role)) toWid = addLong(toWid, refId);
}
if (memberType == Osmformat.Relation.MemberType.NODE) { // currently just waymembers
if ("via".equals(role)) viaNid = addLong(viaNid, refId);
}
}
return wayIds;
}
private void processRelations(List<Osmformat.Relation> relations, BPbfFieldDecoder fieldDecoder) {
for (Osmformat.Relation relation : relations) {
Map<String, String> tags = buildTags(relation.getKeysList(), relation.getValsList(), fieldDecoder);
LongList wayIds = buildRelationMembers(relation.getMemidsList(), relation.getRolesSidList(),
relation.getTypesList(), fieldDecoder);
parser.addRelation(relation.getId(), tags, wayIds, fromWid, toWid, viaNid);
}
}
private void processOsmPrimitives(byte[] data) throws InvalidProtocolBufferException {
Osmformat.PrimitiveBlock block = Osmformat.PrimitiveBlock.parseFrom(data);
BPbfFieldDecoder fieldDecoder = new BPbfFieldDecoder(block);
for (Osmformat.PrimitiveGroup primitiveGroup : block.getPrimitivegroupList()) {
processNodes(primitiveGroup.getDense(), fieldDecoder);
processNodes(primitiveGroup.getNodesList(), fieldDecoder);
processWays(primitiveGroup.getWaysList(), fieldDecoder);
processRelations(primitiveGroup.getRelationsList(), fieldDecoder);
}
}
}

View file

@ -0,0 +1,84 @@
package btools.mapcreator;
import java.util.Date;
import org.openstreetmap.osmosis.osmbinary.Osmformat;
/**
* Manages decoding of the lower level PBF data structures.
* <p/>
*
* @author Brett Henderson
* <p/>
*/
public class BPbfFieldDecoder {
private static final double COORDINATE_SCALING_FACTOR = 0.000000001;
private String[] strings;
private int coordGranularity;
private long coordLatitudeOffset;
private long coordLongitudeOffset;
private int dateGranularity;
/**
* Creates a new instance.
* <p/>
*
* @param primitiveBlock The primitive block containing the fields to be decoded.
*/
public BPbfFieldDecoder(Osmformat.PrimitiveBlock primitiveBlock) {
this.coordGranularity = primitiveBlock.getGranularity();
this.coordLatitudeOffset = primitiveBlock.getLatOffset();
this.coordLongitudeOffset = primitiveBlock.getLonOffset();
this.dateGranularity = primitiveBlock.getDateGranularity();
Osmformat.StringTable stringTable = primitiveBlock.getStringtable();
strings = new String[stringTable.getSCount()];
for (int i = 0; i < strings.length; i++) {
strings[i] = stringTable.getS(i).toStringUtf8();
}
}
/**
* Decodes a raw latitude value into degrees.
* <p/>
*
* @param rawLatitude The PBF encoded value.
* @return The latitude in degrees.
*/
public double decodeLatitude(long rawLatitude) {
return COORDINATE_SCALING_FACTOR * (coordLatitudeOffset + (coordGranularity * rawLatitude));
}
/**
* Decodes a raw longitude value into degrees.
* <p/>
*
* @param rawLongitude The PBF encoded value.
* @return The longitude in degrees.
*/
public double decodeLongitude(long rawLongitude) {
return COORDINATE_SCALING_FACTOR * (coordLongitudeOffset + (coordGranularity * rawLongitude));
}
/**
* Decodes a raw timestamp value into a Date.
* <p/>
*
* @param rawTimestamp The PBF encoded timestamp.
* @return The timestamp as a Date.
*/
public Date decodeTimestamp(long rawTimestamp) {
return new Date(dateGranularity * rawTimestamp);
}
/**
* Decodes a raw string into a String.
* <p/>
*
* @param rawString The PBF encoding string.
* @return The string as a String.
*/
public String decodeString(int rawString) {
return strings[rawString];
}
}

View file

@ -1,10 +1,17 @@
package btools.mapcreator;
import org.openstreetmap.osmosis.osmbinary.Fileformat;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import java.util.HashMap;
import java.util.Map;
import btools.util.LongList;
/**
* Parser for OSM data
@ -22,179 +29,113 @@ public class OsmParser extends MapCreatorBase {
NodeListener nListener,
WayListener wListener,
RelationListener rListener) throws Exception {
this.nListener = nListener;
this.wListener = wListener;
this.rListener = rListener;
if (mapFile == null) {
_br = new BufferedReader(new InputStreamReader(System.in));
} else {
if (mapFile.getName().endsWith(".gz")) {
_br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(mapFile))));
} else {
_br = new BufferedReader(new InputStreamReader(new FileInputStream(mapFile)));
}
System.out.println("*** PBF Parsing: " + mapFile);
// once more for testing
int rawBlobCount = 0;
long bytesRead = 0L;
// wait for file to become available
while (!mapFile.exists()) {
System.out.println("--- waiting for " + mapFile + " to become available");
Thread.sleep(10000);
}
long currentSize = mapFile.length();
long currentSizeTime = System.currentTimeMillis();
DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(mapFile)));
for (; ; ) {
String line = _br.readLine();
if (line == null) break;
if (checkNode(line)) continue;
if (checkWay(line)) continue;
if (checkRelation(line)) continue;
if (checkChangeset(line)) continue;
}
if (mapFile != null) {
_br.close();
}
}
private boolean checkNode(String line) throws Exception {
int idx0 = line.indexOf("<node id=\"");
if (idx0 < 0) return false;
idx0 += 10;
int idx1 = line.indexOf('"', idx0);
long nodeId = Long.parseLong(line.substring(idx0, idx1));
int idx2 = line.indexOf(" lat=\"");
if (idx2 < 0) return false;
idx2 += 6;
int idx3 = line.indexOf('"', idx2);
double lat = Double.parseDouble(line.substring(idx2, idx3));
int idx4 = line.indexOf(" lon=\"");
if (idx4 < 0) return false;
idx4 += 6;
int idx5 = line.indexOf('"', idx4);
double lon = Double.parseDouble(line.substring(idx4, idx5));
NodeData n = new NodeData(nodeId, lon, lat);
if (!line.endsWith("/>")) {
// read additional tags
for (; ; ) {
String l2 = _br.readLine();
if (l2 == null) return false;
int i2;
if ((i2 = l2.indexOf("<tag k=\"")) >= 0) { // property-tag
i2 += 8;
int ri2 = l2.indexOf('"', i2);
String key = l2.substring(i2, ri2);
i2 = l2.indexOf(" v=\"", ri2);
if (i2 >= 0) {
i2 += 4;
int ri3 = l2.indexOf('"', i2);
String value = l2.substring(i2, ri3);
n.putTag(key, value);
}
} else if (l2.indexOf("</node>") >= 0) { // end-tag
// continue reading if either more then a 100 MB unread, or the current-size is known for more then 2 Minutes
while (currentSize - bytesRead < 100000000L) {
long newSize = mapFile.length();
if (newSize != currentSize) {
currentSize = newSize;
currentSizeTime = System.currentTimeMillis();
} else if (System.currentTimeMillis() - currentSizeTime > 120000) {
break;
}
}
}
nListener.nextNode(n);
return true;
}
private boolean checkWay(String line) throws Exception {
int idx0 = line.indexOf("<way id=\"");
if (idx0 < 0) return false;
idx0 += 9;
int idx1 = line.indexOf('"', idx0);
long id = Long.parseLong(line.substring(idx0, idx1));
WayData w = new WayData(id);
// read the nodes
for (; ; ) {
String l2 = _br.readLine();
if (l2 == null) return false;
int i2;
if ((i2 = l2.indexOf("<nd ref=\"")) >= 0) { // node reference
i2 += 9;
int ri2 = l2.indexOf('"', i2);
long nid = Long.parseLong(l2.substring(i2, ri2));
w.nodes.add(nid);
} else if ((i2 = l2.indexOf("<tag k=\"")) >= 0) { // property-tag
i2 += 8;
int ri2 = l2.indexOf('"', i2);
String key = l2.substring(i2, ri2);
i2 = l2.indexOf(" v=\"", ri2);
if (i2 >= 0) {
i2 += 4;
int ri3 = l2.indexOf('"', i2);
String value = l2.substring(i2, ri3);
w.putTag(key, value);
if (currentSize - bytesRead < 100000000L) {
System.out.println("--- waiting for more data, currentSize=" + currentSize + " bytesRead=" + bytesRead);
Thread.sleep(10000);
}
} else if (l2.indexOf("</way>") >= 0) { // end-tag
}
int headerLength;
try {
headerLength = dis.readInt();
bytesRead += 4;
} catch (EOFException e) {
break;
}
byte[] headerBuffer = new byte[headerLength];
dis.readFully(headerBuffer);
bytesRead += headerLength;
Fileformat.BlobHeader blobHeader = Fileformat.BlobHeader.parseFrom(headerBuffer);
byte[] blobData = new byte[blobHeader.getDatasize()];
dis.readFully(blobData);
bytesRead += blobData.length;
new BPbfBlobDecoder(blobHeader.getType(), blobData, this).process();
rawBlobCount++;
}
wListener.nextWay(w);
return true;
dis.close();
System.out.println("read raw blobs: " + rawBlobCount);
}
private boolean checkChangeset(String line) throws Exception {
int idx0 = line.indexOf("<changeset id=\"");
if (idx0 < 0) return false;
if (!line.endsWith("/>")) {
int loopcheck = 0;
for (; ; ) {
String l2 = _br.readLine();
if (l2.indexOf("</changeset>") >= 0 || ++loopcheck > 10000) break;
}
public void addNode(long nid, Map<String, String> tags, double lat, double lon) {
NodeData n = new NodeData(nid, lon, lat);
n.setTags((HashMap<String, String>) tags);
try {
nListener.nextNode(n);
} catch (Exception e) {
throw new RuntimeException("error writing node: " + e);
}
return true;
}
private boolean checkRelation(String line) throws Exception {
int idx0 = line.indexOf("<relation id=\"");
if (idx0 < 0) return false;
public void addWay(long wid, Map<String, String> tags, LongList nodes) {
WayData w = new WayData(wid, nodes);
w.setTags((HashMap<String, String>) tags);
idx0 += 14;
int idx1 = line.indexOf('"', idx0);
long rid = Long.parseLong(line.substring(idx0, idx1));
try {
wListener.nextWay(w);
} catch (Exception e) {
throw new RuntimeException("error writing way: " + e);
}
}
RelationData r = new RelationData(rid);
public void addRelation(long rid, Map<String, String> tags, LongList wayIds, LongList fromWid, LongList toWid, LongList viaNid) {
RelationData r = new RelationData(rid, wayIds);
r.setTags((HashMap<String, String>) tags);
// read the nodes
for (; ; ) {
String l2 = _br.readLine();
if (l2 == null) return false;
int i2;
if ((i2 = l2.indexOf("<member type=\"way\" ref=\"")) >= 0) { // node reference
i2 += 24;
int ri2 = l2.indexOf('"', i2);
long wid = Long.parseLong(l2.substring(i2, ri2));
r.ways.add(wid);
} else if ((i2 = l2.indexOf("<tag k=\"")) >= 0) { // property-tag
i2 += 8;
int ri2 = l2.indexOf('"', i2);
String key = l2.substring(i2, ri2);
i2 = l2.indexOf(" v=\"", ri2);
if (i2 >= 0) {
i2 += 4;
int ri3 = l2.indexOf('"', i2);
String value = l2.substring(i2, ri3);
r.putTag(key, value);
try {
rListener.nextRelation(r);
if (fromWid == null || toWid == null || viaNid == null || viaNid.size() != 1) {
// dummy-TR for each viaNid
for (int vi = 0; vi < (viaNid == null ? 0 : viaNid.size()); vi++) {
rListener.nextRestriction(r, 0L, 0L, viaNid.get(vi));
}
} else if (l2.indexOf("</relation>") >= 0) { // end-tag
break;
return;
}
for (int fi = 0; fi < fromWid.size(); fi++) {
for (int ti = 0; ti < toWid.size(); ti++) {
rListener.nextRestriction(r, fromWid.get(fi), toWid.get(ti), viaNid.get(0));
}
}
} catch (Exception e) {
throw new RuntimeException("error writing relation", e);
}
rListener.nextRelation(r);
return true;
}
}

View file

@ -1,12 +1,14 @@
package btools.mapcreator;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.io.File;
import java.net.URL;
public class MapcreatorTest {
@Ignore("Fails with PBF parser")
@Test
public void mapcreatorTest() throws Exception {
URL mapurl = this.getClass().getResource("/dreieich.osm.gz");